mirror of
https://github.com/compiler-explorer/compiler-explorer.git
synced 2025-12-27 07:04:04 -05:00
368 lines
15 KiB
TypeScript
368 lines
15 KiB
TypeScript
// Copyright (c) 2023, Compiler Explorer Authors
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
// POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
import {assert, unwrap} from '../../assert.js';
|
|
import {SentryCapture} from '../../sentry.js';
|
|
import {BaseInstructionSetInfo} from '../instruction-sets/base.js';
|
|
|
|
import {AssemblyLine, BaseCFGParser, Edge, Node, Range} from './base.js';
|
|
|
|
export type BBRange = {
|
|
namePrefix: string; // used to encode the function name in the first block
|
|
nameId: string;
|
|
start: number;
|
|
end: number;
|
|
};
|
|
|
|
export class LlvmIrCfgParser extends BaseCFGParser {
|
|
functionDefinition: RegExp;
|
|
labelRe: RegExp;
|
|
labelReference: RegExp;
|
|
|
|
static override get key() {
|
|
return 'llvm';
|
|
}
|
|
|
|
constructor(instructionSetInfo: BaseInstructionSetInfo) {
|
|
super(instructionSetInfo);
|
|
this.functionDefinition = /^define .+ @("?[^"]+"?)\(/;
|
|
this.labelRe = /^("?[\w$.-]+"?):\s*(;.*)?$/;
|
|
this.labelReference = /%("?[\w$.-]+"?)/g;
|
|
}
|
|
|
|
override filterData(asmArr: AssemblyLine[]) {
|
|
return asmArr;
|
|
}
|
|
|
|
override splitToFunctions(asmArr: AssemblyLine[]) {
|
|
if (asmArr.length === 0) return [];
|
|
const result: Range[] = [];
|
|
let i = 0;
|
|
while (i < asmArr.length) {
|
|
if (this.functionDefinition.test(asmArr[i].text)) {
|
|
const start = i;
|
|
do {
|
|
i++;
|
|
} while (i < asmArr.length && asmArr[i].text !== '}');
|
|
|
|
// start is the function define, end is the closing brace
|
|
result.push({
|
|
start,
|
|
end: i,
|
|
});
|
|
}
|
|
i++;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
splitToLlvmBasicBlocks(code: AssemblyLine[], fn: Range): BBRange[] {
|
|
const fMatch = code[fn.start].text.match(this.functionDefinition);
|
|
const fnName = unwrap(fMatch)[1];
|
|
const result: BBRange[] = [];
|
|
let i = fn.start + 1;
|
|
let bbStart = i;
|
|
let currentName = '';
|
|
let namePrefix: string = fnName + '\n\n';
|
|
while (i < fn.end) {
|
|
const match = code[i].text.match(this.labelRe);
|
|
if (match) {
|
|
const label = match[1];
|
|
if (bbStart === i) {
|
|
assert(result.length === 0);
|
|
currentName = label;
|
|
} else {
|
|
// start is the fn / label define, end is exclusive
|
|
result.push({
|
|
namePrefix: namePrefix,
|
|
nameId: currentName,
|
|
start: bbStart,
|
|
end: i,
|
|
});
|
|
currentName = label;
|
|
namePrefix = '';
|
|
}
|
|
bbStart = i + 1;
|
|
}
|
|
i++;
|
|
}
|
|
result.push({
|
|
namePrefix: '',
|
|
nameId: currentName,
|
|
start: bbStart,
|
|
end: i,
|
|
});
|
|
return result;
|
|
}
|
|
|
|
makeLlvmNodes(asms: AssemblyLine[], canonicalBasicBlocks: BBRange[]): Node[] {
|
|
return canonicalBasicBlocks.map(e => {
|
|
// Trim newlines at the end of a BB
|
|
let end = e.end;
|
|
while (end > e.start && asms[end - 1].text === '') {
|
|
end--;
|
|
}
|
|
return {
|
|
id: e.nameId,
|
|
label: `${e.namePrefix}${e.nameId}${e.nameId.includes(':') ? '' : ':'}\n${this.concatInstructions(
|
|
asms,
|
|
e.start,
|
|
end,
|
|
)}`,
|
|
};
|
|
});
|
|
}
|
|
|
|
makeLlvmEdges(asmArr: AssemblyLine[], canonicalBasicBlocks: BBRange[]) {
|
|
const edges: Edge[] = [];
|
|
for (const bb of canonicalBasicBlocks) {
|
|
// Find the last instruction in the basic block. I think asmArr[bb.end] is always an empty line (except for
|
|
// the last basic block) but this is just in case.
|
|
let lastInst = bb.end - 1;
|
|
while (lastInst >= bb.start && asmArr[lastInst].text === '') {
|
|
lastInst--;
|
|
}
|
|
|
|
// Ad-hoc handling of a few known cases where LLVM splits a single instruction over multiple lines.
|
|
const terminatingInstruction = (() => {
|
|
if (asmArr[lastInst].text.trim().startsWith(']')) {
|
|
// Llvm likes to split switches over multiple lines:
|
|
// switch i32 %0, label %5 [
|
|
// i32 14, label %7
|
|
// i32 60, label %2
|
|
// i32 12, label %3
|
|
// i32 35, label %"core::Result<&[u8]>::exit53"
|
|
// i32 4, label %4
|
|
// ], !dbg !60
|
|
const end = lastInst--;
|
|
while (!asmArr[lastInst].text.trim().startsWith('switch')) {
|
|
lastInst--;
|
|
}
|
|
return this.concatInstructions(asmArr, lastInst, end + 1);
|
|
}
|
|
if (
|
|
lastInst >= 1 &&
|
|
asmArr[lastInst].text.includes('unwind label') &&
|
|
asmArr[lastInst - 1].text.trim().includes('invoke ')
|
|
) {
|
|
// Handle multi-line `invoke` like:
|
|
// invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @typeinfo for int, ptr null) #3
|
|
// to label %unreachable unwind label %lpad
|
|
return this.concatInstructions(asmArr, lastInst - 1, lastInst + 1);
|
|
}
|
|
if (
|
|
lastInst >= 1 &&
|
|
asmArr[lastInst - 1].text.includes('landingpad') &&
|
|
asmArr[lastInst].text.includes('catch')
|
|
) {
|
|
// Handle multi-line `landingpad` like:
|
|
// %0 = landingpad { ptr, i32 }
|
|
// catch ptr null
|
|
return this.concatInstructions(asmArr, lastInst - 1, lastInst + 1);
|
|
}
|
|
if (
|
|
lastInst >= 1 &&
|
|
asmArr[lastInst - 1].text.includes('callbr') &&
|
|
asmArr[lastInst].text.trim().startsWith('to label')
|
|
) {
|
|
// Handle multi-line `callbr` like:
|
|
// %2 = callbr i32 asm "mov ${1:l}, $0", "=r,!i,~{dirflag},~{fpsr},~{flags}"() #2
|
|
// to label %asm.fallthrough1 [label %err.split2]
|
|
return this.concatInstructions(asmArr, lastInst - 1, lastInst + 1);
|
|
}
|
|
return asmArr[lastInst].text;
|
|
})();
|
|
let terminator;
|
|
if (terminatingInstruction.includes('invoke ')) {
|
|
terminator = 'invoke';
|
|
} else if (terminatingInstruction.includes('callbr')) {
|
|
terminator = 'callbr';
|
|
} else {
|
|
terminator = terminatingInstruction.trim().split(' ')[0].replaceAll(',', '');
|
|
}
|
|
|
|
const labels = [...terminatingInstruction.matchAll(this.labelReference)].map(m => m[1]);
|
|
switch (terminator) {
|
|
case 'ret':
|
|
case 'unreachable': {
|
|
break;
|
|
}
|
|
case 'br': {
|
|
// br label %16, !dbg !41
|
|
// br i1 %13, label %59, label %14, !dbg !41
|
|
if (labels.length === 1) {
|
|
edges.push({
|
|
from: bb.nameId,
|
|
to: labels[0],
|
|
arrows: 'to',
|
|
color: 'blue',
|
|
});
|
|
} else if (labels.length === 3) {
|
|
edges.push(
|
|
{
|
|
from: bb.nameId,
|
|
to: labels[1],
|
|
arrows: 'to',
|
|
color: 'green',
|
|
},
|
|
{
|
|
from: bb.nameId,
|
|
to: labels[2],
|
|
arrows: 'to',
|
|
color: 'red',
|
|
},
|
|
);
|
|
} else if (labels.length === 2) {
|
|
// br i1 true, label %bb1, label %bb4
|
|
edges.push(
|
|
{
|
|
from: bb.nameId,
|
|
to: labels[0],
|
|
arrows: 'to',
|
|
color: 'green',
|
|
},
|
|
{
|
|
from: bb.nameId,
|
|
to: labels[1],
|
|
arrows: 'to',
|
|
color: 'red',
|
|
},
|
|
);
|
|
} else {
|
|
SentryCapture(terminatingInstruction, 'makeLlvmEdges unexpected br');
|
|
assert(false);
|
|
}
|
|
break;
|
|
}
|
|
case 'switch': {
|
|
// switch i32 %val, label %default [ i32 0, label %onzero i32 1, label %onone i32 2, label %ontwo ]
|
|
for (const label of labels.slice(1)) {
|
|
edges.push({
|
|
from: bb.nameId,
|
|
to: label,
|
|
arrows: 'to',
|
|
color: 'blue',
|
|
});
|
|
}
|
|
break;
|
|
}
|
|
case 'indirectbr': {
|
|
// indirectbr ptr %Addr, [ label %bb1, label %bb2, label %bb3 ]
|
|
for (const label of labels.slice(1)) {
|
|
edges.push({
|
|
from: bb.nameId,
|
|
to: label,
|
|
arrows: 'to',
|
|
color: 'blue',
|
|
});
|
|
}
|
|
break;
|
|
}
|
|
case 'invoke': {
|
|
// %retval = invoke i32 @Test(i32 15) to label %Continue unwind label %TestCleanup
|
|
edges.push(
|
|
{
|
|
from: bb.nameId,
|
|
to: labels[labels.length - 2],
|
|
arrows: 'to',
|
|
color: 'green',
|
|
},
|
|
{
|
|
from: bb.nameId,
|
|
to: labels[labels.length - 1],
|
|
arrows: 'to',
|
|
color: 'grey',
|
|
},
|
|
);
|
|
break;
|
|
}
|
|
case 'callbr': {
|
|
// callbr void asm "", "r,!i"(i32 %x) to label %fallthrough [label %indirect]
|
|
{
|
|
const callbrLabelsPart = terminatingInstruction.slice(
|
|
terminatingInstruction.lastIndexOf('to label'),
|
|
);
|
|
const callbrLabels = [...callbrLabelsPart.matchAll(this.labelReference)].map(m => m[1]);
|
|
edges.push({
|
|
from: bb.nameId,
|
|
to: callbrLabels[0],
|
|
arrows: 'to',
|
|
color: 'grey',
|
|
});
|
|
for (const label of callbrLabels.slice(1)) {
|
|
edges.push({
|
|
from: bb.nameId,
|
|
to: label,
|
|
arrows: 'to',
|
|
color: 'blue',
|
|
});
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 'resume': {
|
|
// TODO: Landing pads?
|
|
break;
|
|
}
|
|
case 'catchswitch': {
|
|
// %cs2 = catchswitch within %parenthandler [label %handler0] unwind label %cleanup
|
|
// TODO
|
|
break;
|
|
}
|
|
case 'catchret': {
|
|
// catchret from %catch to label %continue
|
|
// TODO
|
|
break;
|
|
}
|
|
case 'cleanupret': {
|
|
// cleanupret from %cleanup unwind label %continue
|
|
// TODO
|
|
break;
|
|
}
|
|
default: {
|
|
if (bb.start > lastInst) {
|
|
// this can happen when a basic block is empty, which can happen for the entry block
|
|
} else {
|
|
throw new Error(`Unexpected basic block terminator: ${terminatingInstruction}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return edges;
|
|
}
|
|
|
|
override generateFunctionCfg(code: AssemblyLine[], fn: Range) {
|
|
const basicBlocks = this.splitToLlvmBasicBlocks(code, fn);
|
|
return {
|
|
nodes: this.makeLlvmNodes(code, basicBlocks),
|
|
edges: this.makeLlvmEdges(code, basicBlocks),
|
|
};
|
|
}
|
|
|
|
override getFnName(code: AssemblyLine[], fn: Range) {
|
|
const match = code[fn.start].text.match(this.functionDefinition);
|
|
return unwrap(match)[1];
|
|
}
|
|
}
|