Add CFG for MSVC compilers (#8134)

This commit is contained in:
Ofek
2025-09-25 00:23:36 +03:00
committed by GitHub
parent df5a83ea5b
commit 5a7b0c27a4
6 changed files with 579 additions and 18 deletions

View File

@@ -28,3 +28,4 @@ export {GccCFGParser} from './gcc.js';
export {LlvmIrCfgParser} from './llvm-ir.js';
export {OatCFGParser} from './oat.js';
export {PythonCFGParser} from './python.js';
export {VcCFGParser} from './vc.js';

View File

@@ -82,16 +82,16 @@ export class BaseCFGParser {
public splitToFunctions(asmArr: AssemblyLine[]): Range[] {
if (asmArr.length === 0) return [];
const result: Range[] = [];
let first = 1;
let cur = 1;
const last = asmArr.length;
const fnRange: Range = {start: 0, end: 0};
while (first !== last) {
if (this.isFunctionEnd(asmArr[first].text)) {
fnRange.end = first;
while (cur !== last) {
if (this.isFunctionEnd(asmArr[cur].text)) {
fnRange.end = cur;
if (fnRange.end > fnRange.start + 1) result.push(_.clone(fnRange));
fnRange.start = first;
fnRange.start = cur;
}
++first;
++cur;
}
fnRange.end = last;
@@ -207,13 +207,19 @@ export class BaseCFGParser {
};
const generateName = (name: string, suffix: number) => {
const pos = name.indexOf('@');
if (pos === -1) return `${name}@${suffix}`;
const pos = name.indexOf(this.getLabelSeparator());
if (pos === -1) return `${name + this.getLabelSeparator() + suffix}`;
return name.substring(0, pos + 1) + suffix;
};
const bb = arrBB[bbIdx];
return hasName(asmArr, bb) ? asmArr[bb.end].text : generateName(bb.nameId, bb.end);
if (hasName(asmArr, bb)) return asmArr[bb.end].text;
const newBbName = generateName(bb.nameId, bb.end);
arrBB[bbIdx + 1].nameId = newBbName;
return newBbName;
}
protected getLabelSeparator() {
return '@';
}
protected splitToCanonicalBasicBlock(basicBlock: BBRange): CanonicalBB[] {
@@ -234,7 +240,11 @@ export class BaseCFGParser {
if (actPosSz === 1)
return [
{nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1},
{nameId: basicBlock.nameId + '@' + (actionPos[0] + 1), start: actionPos[0] + 1, end: basicBlock.end},
{
nameId: basicBlock.nameId + this.getLabelSeparator() + (actionPos[0] + 1),
start: actionPos[0] + 1,
end: basicBlock.end,
},
];
let cur = 0;
@@ -244,14 +254,18 @@ export class BaseCFGParser {
const result: CanonicalBB[] = [];
result.push(_.clone(tmp));
while (cur !== last - 1) {
tmp.nameId = blockName + '@' + (actionPos[cur] + 1);
tmp.nameId = blockName + this.getLabelSeparator() + (actionPos[cur] + 1);
tmp.start = actionPos[cur] + 1;
++cur;
tmp.end = actionPos[cur] + 1;
result.push(_.clone(tmp));
}
tmp = {nameId: blockName + '@' + (actionPos[cur] + 1), start: actionPos[cur] + 1, end: basicBlock.end};
tmp = {
nameId: blockName + this.getLabelSeparator() + (actionPos[cur] + 1),
start: actionPos[cur] + 1,
end: basicBlock.end,
};
result.push(_.clone(tmp));
return result;

106
lib/cfg/cfg-parsers/vc.ts Normal file
View File

@@ -0,0 +1,106 @@
// Copyright (c) 2025, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import _ from 'underscore';
import {AssemblyLine, BaseCFGParser, Range} from './base.js';
export class VcCFGParser extends BaseCFGParser {
static override get key() {
return 'vc';
}
override filterData(assembly: AssemblyLine[]): AssemblyLine[] {
// Keep only lines between a line that starts with '... PROC' and a line that ends with '... ENDP'.
// Remove lines that start with ';'
const removeComment = (line: AssemblyLine) => {
const pos = line.text.indexOf(';');
let newText = line.text;
if (pos !== -1) {
newText = line.text.substring(0, pos).trimEnd();
}
return {...line, text: newText};
};
const noCommentLines = assembly.map(removeComment).filter(line => line.text.length > 0);
const isFuncStart = (line: string) => {
return line.endsWith(' PROC');
};
const isFuncEnd = (line: string) => {
return line.endsWith(' ENDP');
};
const newRes: AssemblyLine[] = [];
let inFunction = false;
for (const line of noCommentLines) {
if (isFuncStart(line.text)) {
inFunction = true;
}
if (inFunction) {
newRes.push(line);
}
if (isFuncEnd(line.text)) {
inFunction = false;
}
}
return newRes;
}
override splitToFunctions(asmArr: AssemblyLine[]): Range[] {
if (asmArr.length === 0) return [];
const result: Range[] = [];
let cur = 1;
const last = asmArr.length;
const fnRange: Range = {start: 0, end: 0};
do {
if (this.isFunctionEnd(asmArr[cur].text)) {
fnRange.end = cur + 1;
result.push(_.clone(fnRange));
fnRange.start = cur + 1;
}
++cur;
} while (cur < last);
fnRange.end = last;
if (fnRange.end > fnRange.start + 1) result.push(_.clone(fnRange));
return result;
}
override isFunctionEnd(x: string) {
return x.endsWith('ENDP');
}
override isBasicBlockEnd(inst: string, prevInst: string) {
// Keep ENDP line in the same block as prevInst. Might drop it entirely.
if (this.isFunctionEnd(inst)) return false;
return inst[0] === '$';
}
override extractJmpTargetName(inst: string) {
return inst.match(/\$.*/) + ':';
}
override getLabelSeparator() {
// `@` is used natively by MSVC labels, so we use `#` instead
return '#';
}
}

View File

@@ -25,7 +25,7 @@
import {CompilationResult} from '../../types/compilation/compilation.interfaces.js';
import type {CompilerInfo} from '../../types/compiler.interfaces.js';
import {AssemblyLine, Edge, getParserByKey, Node} from './cfg-parsers/index.js';
import {AssemblyLine, BaseCFGParser, Edge, getParserByKey, Node} from './cfg-parsers/index.js';
import {OatCFGParser} from './cfg-parsers/oat.js';
import {getInstructionSetByKey} from './instruction-sets/index.js';
@@ -66,9 +66,10 @@ export async function generateStructure(
// dex2oat is a special case because it can output different instruction
// sets. Create an OAT parser instead of searching by ISA.
const parser = compilerGroup?.includes('dex2oat')
? new OatCFGParser(instructionSet)
: new (getParserByKey(compilerGroup))(instructionSet);
let parser: BaseCFGParser;
if (compilerGroup?.includes('dex2oat')) parser = new OatCFGParser(instructionSet);
else if (compilerGroup?.startsWith('vc')) parser = new (getParserByKey('vc'))(instructionSet);
else parser = new (getParserByKey(compilerGroup))(instructionSet);
let code = parser.filterData(asmArr);
code = await parser.processFuncNames(code, fullRes);

426
test/cfg-cases/cfg-msvc.json generated Normal file
View File

@@ -0,0 +1,426 @@
{
"asm": [
{
"text": "int do_some_work(int) PROC",
"source": null
},
{
"text": " mov DWORD PTR [rsp+8], ecx",
"source": {
"file": null,
"line": 4
}
},
{
"text": " cmp DWORD PTR x$[rsp], 6",
"source": {
"file": null,
"line": 5
}
},
{
"text": " jle SHORT $LN2@do_some_wo",
"source": {
"file": null,
"line": 5
}
},
{
"text": " jmp SHORT $LN3@do_some_wo",
"source": {
"file": null,
"line": 6
}
},
{
"text": " jmp SHORT $endlabel$5",
"source": {
"file": null,
"line": 6
}
},
{
"text": "$LN2@do_some_wo:",
"source": null
},
{
"text": " mov eax, DWORD PTR x$[rsp]",
"source": {
"file": null,
"line": 7
}
},
{
"text": " add eax, 8",
"source": {
"file": null,
"line": 7
}
},
{
"text": " jmp SHORT $LN1@do_some_wo",
"source": {
"file": null,
"line": 7
}
},
{
"text": "$LN3@do_some_wo:",
"source": null
},
{
"text": "$endlabel$5:",
"source": null
},
{
"text": " mov eax, DWORD PTR x$[rsp]",
"source": {
"file": null,
"line": 9
}
},
{
"text": "$LN1@do_some_wo:",
"source": null
},
{
"text": " ret 0",
"source": {
"file": null,
"line": 10
}
},
{
"text": "int do_some_work(int) ENDP",
"source": null
},
{
"text": "void process_items(std::vector<int,std::allocator<int> > const &) PROC",
"source": null
},
{
"text": "$LN6:",
"source": null
},
{
"text": " mov QWORD PTR [rsp+8], rcx",
"source": {
"file": null,
"line": 12
}
},
{
"text": " sub rsp, 56",
"source": {
"file": null,
"line": 12
}
},
{
"text": " mov rcx, QWORD PTR vec$[rsp]",
"source": {
"file": null,
"line": 13
}
},
{
"text": " call unsigned __int64 std::vector<int,std::allocator<int> >::size(void)const",
"source": {
"file": null,
"line": 13
}
},
{
"text": " mov QWORD PTR sz$[rsp], rax",
"source": {
"file": null,
"line": 13
}
},
{
"text": " mov QWORD PTR i$1[rsp], 0",
"source": {
"file": null,
"line": 14
}
},
{
"text": " jmp SHORT $LN4@process_it",
"source": {
"file": null,
"line": 14
}
},
{
"text": "$LN2@process_it:",
"source": null
},
{
"text": " mov rax, QWORD PTR i$1[rsp]",
"source": {
"file": null,
"line": 14
}
},
{
"text": " inc rax",
"source": {
"file": null,
"line": 14
}
},
{
"text": " mov QWORD PTR i$1[rsp], rax",
"source": {
"file": null,
"line": 14
}
},
{
"text": "$LN4@process_it:",
"source": null
},
{
"text": " mov rax, QWORD PTR sz$[rsp]",
"source": {
"file": null,
"line": 14
}
},
{
"text": " cmp QWORD PTR i$1[rsp], rax",
"source": {
"file": null,
"line": 14
}
},
{
"text": " jae SHORT $LN3@process_it",
"source": {
"file": null,
"line": 14
}
},
{
"text": " mov rdx, QWORD PTR i$1[rsp]",
"source": {
"file": null,
"line": 15
}
},
{
"text": " mov rcx, QWORD PTR vec$[rsp]",
"source": {
"file": null,
"line": 15
}
},
{
"text": " call int const & std::vector<int,std::allocator<int> >::operator[](unsigned __int64)const",
"source": {
"file": null,
"line": 15
}
},
{
"text": " mov ecx, DWORD PTR [rax]",
"source": {
"file": null,
"line": 15
}
},
{
"text": " call int do_some_work(int)",
"source": {
"file": null,
"line": 15
}
},
{
"text": " npad 1",
"source": {
"file": null,
"line": 15
}
},
{
"text": " jmp SHORT $LN2@process_it",
"source": {
"file": null,
"line": 16
}
},
{
"text": "$LN3@process_it:",
"source": null
},
{
"text": " add rsp, 56",
"source": {
"file": null,
"line": 17
}
},
{
"text": " ret 0",
"source": {
"file": null,
"line": 17
}
},
{
"text": "void process_items(std::vector<int,std::allocator<int> > const &) ENDP",
"source": null
}
],
"cfg": {
"int do_some_work(int) PROC": {
"nodes": [
{
"id": "int do_some_work(int) PROC",
"label": "int do_some_work(int) PROC:\n mov DWORD PTR [rsp+8], ecx\n cmp DWORD PTR x$[rsp], 6\n jle SHORT $LN2@do_some_wo"
},
{
"id": "int do_some_work(int) PROC#4",
"label": "int do_some_work(int) PROC#4:\n jmp SHORT $LN3@do_some_wo"
},
{
"id": "int do_some_work(int) PROC#5",
"label": "int do_some_work(int) PROC#5:\n jmp SHORT $endlabel$5"
},
{
"id": "$LN2@do_some_wo:",
"label": "$LN2@do_some_wo:\n mov eax, DWORD PTR x$[rsp]\n add eax, 8\n jmp SHORT $LN1@do_some_wo"
},
{
"id": "$LN3@do_some_wo:",
"label": "$LN3@do_some_wo:\n"
},
{
"id": "$endlabel$5:",
"label": "$endlabel$5:\n mov eax, DWORD PTR x$[rsp]"
},
{
"id": "$LN1@do_some_wo:",
"label": "$LN1@do_some_wo:\n ret 0\nint do_some_work(int) ENDP"
}
],
"edges": [
{
"from": "int do_some_work(int) PROC",
"to": "$LN2@do_some_wo:",
"arrows": "to",
"color": "green"
},
{
"from": "int do_some_work(int) PROC",
"to": "int do_some_work(int) PROC#4",
"arrows": "to",
"color": "red"
},
{
"from": "int do_some_work(int) PROC#4",
"to": "$LN3@do_some_wo:",
"arrows": "to",
"color": "blue"
},
{
"from": "int do_some_work(int) PROC#5",
"to": "$endlabel$5:",
"arrows": "to",
"color": "blue"
},
{
"from": "$LN2@do_some_wo:",
"to": "$LN1@do_some_wo:",
"arrows": "to",
"color": "blue"
},
{
"from": "$LN3@do_some_wo:",
"to": "$endlabel$5:",
"arrows": "to",
"color": "grey"
},
{
"from": "$endlabel$5:",
"to": "$LN1@do_some_wo:",
"arrows": "to",
"color": "grey"
},
{
"from": "$LN1@do_some_wo:",
"to": "void process_items(std::vector<int,std::allocator<int> > const &) PROC",
"arrows": "to",
"color": "grey"
}
]
},
"void process_items(std::vector<int,std::allocator<int> > const &) PROC": {
"nodes": [
{
"id": "void process_items(std::vector<int,std::allocator<int> > const &) PROC",
"label": "void process_items(std::vector<int,std::allocator<int> > const &) PROC\n"
},
{
"id": "$LN6:",
"label": "$LN6:\n mov QWORD PTR [rsp+8], rcx\n sub rsp, 56\n mov rcx, QWORD PTR vec$[rsp]\n call unsigned __int64 std::vector<int,std::allocator<int> >::size(void)const\n mov QWORD PTR sz$[rsp], rax\n mov QWORD PTR i$1[rsp], 0\n jmp SHORT $LN4@process_it"
},
{
"id": "$LN2@process_it:",
"label": "$LN2@process_it:\n mov rax, QWORD PTR i$1[rsp]\n inc rax\n mov QWORD PTR i$1[rsp], rax"
},
{
"id": "$LN4@process_it:",
"label": "$LN4@process_it:\n mov rax, QWORD PTR sz$[rsp]\n cmp QWORD PTR i$1[rsp], rax\n jae SHORT $LN3@process_it"
},
{
"id": "$LN4@process_it:#33",
"label": "$LN4@process_it:#33\n mov rdx, QWORD PTR i$1[rsp]\n mov rcx, QWORD PTR vec$[rsp]\n call int const & std::vector<int,std::allocator<int> >::operator[](unsigned __int64)const\n mov ecx, DWORD PTR [rax]\n call int do_some_work(int)\n npad 1\n jmp SHORT $LN2@process_it"
},
{
"id": "$LN3@process_it:",
"label": "$LN3@process_it:\n add rsp, 56\n ret 0\nvoid process_items(std::vector<int,std::allocator<int> > const &) ENDP"
}
],
"edges": [
{
"from": "void process_items(std::vector<int,std::allocator<int> > const &) PROC",
"to": "$LN6:",
"arrows": "to",
"color": "grey"
},
{
"from": "$LN6:",
"to": "$LN4@process_it:",
"arrows": "to",
"color": "blue"
},
{
"from": "$LN2@process_it:",
"to": "$LN4@process_it:",
"arrows": "to",
"color": "grey"
},
{
"from": "$LN4@process_it:",
"to": "$LN3@process_it:",
"arrows": "to",
"color": "green"
},
{
"from": "$LN4@process_it:",
"to": "$LN4@process_it:#33",
"arrows": "to",
"color": "red"
},
{
"from": "$LN4@process_it:#33",
"to": "$LN2@process_it:",
"arrows": "to",
"color": "blue"
}
]
}
}
}

View File

@@ -33,7 +33,7 @@ import {generateStructure} from '../lib/cfg/cfg.js';
import {CompilerInfo} from '../types/compiler.interfaces.js';
import {makeFakeCompilerInfo, resolvePathFromTestRoot} from './utils.js';
async function DoCfgTest(cfgArg, filename, isLlvmIr = false, compilerInfo?: CompilerInfo) {
async function DoCfgTest(cfgArg: string, filename: string, isLlvmIr = false, compilerInfo?: CompilerInfo) {
const contents = JSON.parse(await fs.readFile(filename, 'utf8'));
if (!compilerInfo) {
compilerInfo = makeFakeCompilerInfo({
@@ -69,6 +69,19 @@ describe('Cfg test cases', () => {
}
});
describe('msvc', () => {
const msvcCompilerInfo = makeFakeCompilerInfo({
group: 'vc',
version: 'vc2022',
compilerType: 'vc',
});
for (const filename of files.filter(x => x.includes('msvc'))) {
it(filename, async () => {
await DoCfgTest('vc', path.join(testcasespath, filename), false, msvcCompilerInfo);
});
}
});
describe('llvmir', () => {
for (const filename of files.filter(x => x.includes('llvmir'))) {
it(filename, async () => {