mirror of
https://github.com/compiler-explorer/compiler-explorer.git
synced 2025-12-27 07:04:04 -05:00
309 lines
12 KiB
TypeScript
309 lines
12 KiB
TypeScript
// Copyright (c) 2024, Compiler Explorer Authors
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
// POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
import _ from 'underscore';
|
|
|
|
import {EdgeColor} from '../../../types/compilation/cfg.interfaces.js';
|
|
import {logger} from '../../logger.js';
|
|
import {BaseInstructionSetInfo, InstructionType} from '../instruction-sets/base.js';
|
|
|
|
import {AssemblyLine, BBRange, BaseCFGParser, CanonicalBB, Edge, Range} from './base.js';
|
|
|
|
// This currently only covers the default arm64 output. To support dex2oat's
|
|
// other ISAs, we just need to make sure the correct isJmpInstruction() is being
|
|
// used, and parse out the destination addrs differently. For example:
|
|
// - x86/x86_64 0x00004071 jmp +34 (0x00004098)
|
|
// - arm i 0x00004052 b 0x00004078
|
|
// - riscv64 0x0000411c bltz t6, +40 ; 0x00004144
|
|
export class OatCFGParser extends BaseCFGParser {
|
|
code: AssemblyLine[];
|
|
|
|
jmpAddrRegex: RegExp;
|
|
hexRegex: RegExp;
|
|
|
|
constructor(instructionSetInfo: BaseInstructionSetInfo) {
|
|
super(instructionSetInfo);
|
|
this.code = [];
|
|
this.jmpAddrRegex = /.*\(addr (0x.*)\)/;
|
|
this.hexRegex = /0x0+(.*)/;
|
|
}
|
|
|
|
static override get key() {
|
|
return 'oat';
|
|
}
|
|
|
|
// Generally the same as the base filterData(), but we keep empty lines
|
|
// because there are no other indicators for the end of a function.
|
|
override filterData(assembly: AssemblyLine[]) {
|
|
const isCode = (x: AssemblyLine) => x && (x.source !== null || this.isFunctionName(x));
|
|
return this.filterTextSection(assembly).map(_.clone).filter(isCode);
|
|
}
|
|
|
|
// For filtering through the 'Instruction set', 'Instruction set features',
|
|
// and 'Compiler filter' lines at the top of the compiler output.
|
|
isHeaderInfo(text: string) {
|
|
return text.startsWith('Instruction ') || text.startsWith('Compiler ');
|
|
}
|
|
|
|
// Uses the same general flow as the overridden function, but accounts for
|
|
// empty spaces between functions.
|
|
override splitToFunctions(asmArr: AssemblyLine[]) {
|
|
if (asmArr.length === 0) return [];
|
|
const result: Range[] = [];
|
|
let first = 0;
|
|
while (!asmArr[first].text || this.isHeaderInfo(asmArr[first].text)) {
|
|
++first;
|
|
}
|
|
const last = asmArr.length;
|
|
const fnRange: Range = {start: first, end: first};
|
|
while (first !== last) {
|
|
if (this.isFunctionEnd(asmArr[first].text)) {
|
|
fnRange.end = first;
|
|
if (fnRange.end > fnRange.start) {
|
|
result.push(_.clone(fnRange));
|
|
}
|
|
fnRange.start = first + 1;
|
|
}
|
|
++first;
|
|
}
|
|
|
|
fnRange.end = last;
|
|
if (fnRange.end > fnRange.start) {
|
|
result.push(_.clone(fnRange));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// In these examples, '0x416c' and '0x8074' will be returned.
|
|
// 0x00004144 b #+0x28 (addr 0x416c)
|
|
// 0x00008050 b.hs #+0x24 (addr 0x00008074)
|
|
getJmpAddr(inst: string): string {
|
|
const match = inst.match(this.jmpAddrRegex);
|
|
if (match) return this.shortenHex(match[1]);
|
|
return '';
|
|
}
|
|
|
|
// In this example, '0x0000416c' will be returned.
|
|
// 0x0000416c ret
|
|
getPc(inst: string) {
|
|
return inst.trim().split(/\s+/)[0].toLowerCase();
|
|
}
|
|
|
|
// In this example, 'add' will be returned.
|
|
// 0x00004168 add w0, w1, #0x3 (3)
|
|
getOpcode(inst: string) {
|
|
return inst.trim().split(/\s+/)[1].toLowerCase();
|
|
}
|
|
|
|
isJmpTarget(inst: string, jmpAddrs: string[]) {
|
|
return jmpAddrs.includes(this.shortenHex(this.getPc(inst)));
|
|
}
|
|
|
|
// '0x00004168' -> '0x4168'
|
|
shortenHex(pc: string) {
|
|
const match = pc.match(this.hexRegex);
|
|
if (match) return '0x' + match[1];
|
|
return pc;
|
|
}
|
|
|
|
override splitToBasicBlocks(asmArr: AssemblyLine[], range: Range) {
|
|
let first = range.start;
|
|
const last = range.end;
|
|
if (first === last) return [];
|
|
|
|
// Collect branch targets so we know where to start new blocks.
|
|
const jmpAddrs: string[] = [];
|
|
while (first < last) {
|
|
if (asmArr[first].text.includes('(addr ')) {
|
|
const addr = this.getJmpAddr(asmArr[first].text);
|
|
if (addr) jmpAddrs.push(addr);
|
|
}
|
|
++first;
|
|
}
|
|
// range.start is the function name; we want blocks' ranges to begin
|
|
// with the first instruction.
|
|
first = range.start + 1;
|
|
|
|
let rangeBb: BBRange = {
|
|
nameId: this.shortenHex(this.getPc(asmArr[first].text)),
|
|
start: first,
|
|
end: 0,
|
|
actionPos: [],
|
|
};
|
|
|
|
const newRangeWith = (oldRange: BBRange, nameId: string, start: number) => ({
|
|
nameId: nameId,
|
|
start: start,
|
|
actionPos: [],
|
|
end: oldRange.end,
|
|
});
|
|
|
|
const result: BBRange[] = [];
|
|
while (first < last) {
|
|
const inst = asmArr[first].text;
|
|
const opcode = this.getOpcode(inst);
|
|
if (this.isBasicBlockEnd(inst, asmArr[first - 1] ? asmArr[first - 1].text : '')) {
|
|
rangeBb.end = first;
|
|
result.push(_.clone(rangeBb));
|
|
rangeBb = newRangeWith(rangeBb, this.extractNodeName(inst), first + 1);
|
|
} else if (this.instructionSetInfo.isJmpInstruction(opcode)) {
|
|
rangeBb.actionPos.push(first);
|
|
} else if (this.isJmpTarget(inst, jmpAddrs)) {
|
|
rangeBb.end = first;
|
|
result.push(_.clone(rangeBb));
|
|
rangeBb = newRangeWith(rangeBb, this.extractNodeName(inst), first);
|
|
}
|
|
++first;
|
|
}
|
|
|
|
rangeBb.end = last;
|
|
result.push(_.clone(rangeBb));
|
|
return result;
|
|
}
|
|
|
|
// Empty lines indicate the end of a function.
|
|
override isFunctionEnd(text: string) {
|
|
return text.trim().length === 0;
|
|
}
|
|
|
|
// All nodes are named after the address of their first instruction.
|
|
override extractNodeName(inst: string) {
|
|
return this.shortenHex(this.getPc(inst));
|
|
}
|
|
|
|
// Identical to splitToCanonicalBasicBlock(), but with a different node
|
|
// naming scheme.
|
|
splitToCanonicalBasicBlockOat(code: AssemblyLine[], basicBlock: BBRange): CanonicalBB[] {
|
|
const actionPos = basicBlock.actionPos;
|
|
let actPosSz = actionPos.length;
|
|
if (actionPos[actPosSz - 1] + 1 === basicBlock.end) {
|
|
--actPosSz;
|
|
}
|
|
|
|
if (actPosSz === 0)
|
|
return [
|
|
{
|
|
nameId: basicBlock.nameId,
|
|
start: basicBlock.start,
|
|
end: basicBlock.end,
|
|
},
|
|
];
|
|
if (actPosSz === 1)
|
|
return [
|
|
{nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1},
|
|
{
|
|
nameId: this.extractNodeName(this.code[actionPos[0] + 1].text),
|
|
start: actionPos[0] + 1,
|
|
end: basicBlock.end,
|
|
},
|
|
];
|
|
|
|
let first = 0;
|
|
const last = actPosSz;
|
|
const blockName = basicBlock.nameId;
|
|
let tmp: CanonicalBB = {nameId: blockName, start: basicBlock.start, end: actionPos[first] + 1};
|
|
const result: CanonicalBB[] = [];
|
|
result.push(_.clone(tmp));
|
|
while (first !== last - 1) {
|
|
tmp.nameId = this.extractNodeName(this.code[actionPos[first] + 1].text);
|
|
tmp.start = actionPos[first] + 1;
|
|
++first;
|
|
tmp.end = actionPos[first] + 1;
|
|
result.push(_.clone(tmp));
|
|
}
|
|
|
|
tmp = {
|
|
nameId: this.extractNodeName(this.code[actionPos[first] + 1].text),
|
|
start: actionPos[first] + 1,
|
|
end: basicBlock.end,
|
|
};
|
|
result.push(_.clone(tmp));
|
|
|
|
return result;
|
|
}
|
|
|
|
override makeEdges(asmArr: AssemblyLine[], arrOfCanonicalBasicBlock: CanonicalBB[]) {
|
|
const edges: Edge[] = [];
|
|
|
|
const setEdge = (sourceNode: string, targetNode: string, color: EdgeColor) => ({
|
|
from: sourceNode,
|
|
to: targetNode,
|
|
arrows: 'to',
|
|
color: color,
|
|
});
|
|
|
|
for (const x of arrOfCanonicalBasicBlock) {
|
|
let targetNode;
|
|
const lastInst = asmArr[x.end - 1].text;
|
|
const opcode = this.getOpcode(lastInst);
|
|
switch (this.instructionSetInfo.getInstructionType(opcode)) {
|
|
case InstructionType.jmp: {
|
|
targetNode = this.shortenHex(this.getJmpAddr(lastInst));
|
|
edges.push(setEdge(x.nameId, targetNode, 'blue'));
|
|
break;
|
|
}
|
|
case InstructionType.conditionalJmpInst: {
|
|
// Branch taken
|
|
targetNode = this.shortenHex(this.getJmpAddr(lastInst));
|
|
edges.push(setEdge(x.nameId, targetNode, 'green'));
|
|
// Branch not taken
|
|
targetNode = this.extractNodeName(asmArr[x.end].text);
|
|
edges.push(setEdge(x.nameId, targetNode, 'red'));
|
|
break;
|
|
}
|
|
case InstructionType.notRetInst: {
|
|
// No jmp, but the next instruction is in a different basic
|
|
// block because it is the target of another jmp.
|
|
if (asmArr[x.end]) {
|
|
targetNode = this.extractNodeName(asmArr[x.end].text);
|
|
edges.push(setEdge(x.nameId, targetNode, 'grey'));
|
|
}
|
|
break;
|
|
}
|
|
case InstructionType.retInst: {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
logger.debug(edges);
|
|
return edges;
|
|
}
|
|
|
|
override generateFunctionCfg(code: AssemblyLine[], fn: Range) {
|
|
this.code = _.clone(code);
|
|
const basicBlocks = this.splitToBasicBlocks(code, fn);
|
|
let arrOfCanonicalBasicBlock: CanonicalBB[] = [];
|
|
for (const bb of basicBlocks) {
|
|
// We don't want to use the base class's split method.
|
|
const tmp = this.splitToCanonicalBasicBlockOat(code, bb);
|
|
arrOfCanonicalBasicBlock = arrOfCanonicalBasicBlock.concat(tmp);
|
|
}
|
|
return {
|
|
nodes: this.makeNodes(code, arrOfCanonicalBasicBlock),
|
|
edges: this.makeEdges(code, arrOfCanonicalBasicBlock),
|
|
};
|
|
}
|
|
}
|