diff --git a/lib/cfg/cfg-parsers/_all.ts b/lib/cfg/cfg-parsers/_all.ts index 8250cfdba..17d1bdf38 100644 --- a/lib/cfg/cfg-parsers/_all.ts +++ b/lib/cfg/cfg-parsers/_all.ts @@ -28,3 +28,4 @@ export {GccCFGParser} from './gcc.js'; export {LlvmIrCfgParser} from './llvm-ir.js'; export {OatCFGParser} from './oat.js'; export {PythonCFGParser} from './python.js'; +export {VcCFGParser} from './vc.js'; diff --git a/lib/cfg/cfg-parsers/base.ts b/lib/cfg/cfg-parsers/base.ts index e65e86f31..2c6db1fd1 100644 --- a/lib/cfg/cfg-parsers/base.ts +++ b/lib/cfg/cfg-parsers/base.ts @@ -82,16 +82,16 @@ export class BaseCFGParser { public splitToFunctions(asmArr: AssemblyLine[]): Range[] { if (asmArr.length === 0) return []; const result: Range[] = []; - let first = 1; + let cur = 1; const last = asmArr.length; const fnRange: Range = {start: 0, end: 0}; - while (first !== last) { - if (this.isFunctionEnd(asmArr[first].text)) { - fnRange.end = first; + while (cur !== last) { + if (this.isFunctionEnd(asmArr[cur].text)) { + fnRange.end = cur; if (fnRange.end > fnRange.start + 1) result.push(_.clone(fnRange)); - fnRange.start = first; + fnRange.start = cur; } - ++first; + ++cur; } fnRange.end = last; @@ -207,13 +207,19 @@ export class BaseCFGParser { }; const generateName = (name: string, suffix: number) => { - const pos = name.indexOf('@'); - if (pos === -1) return `${name}@${suffix}`; - + const pos = name.indexOf(this.getLabelSeparator()); + if (pos === -1) return `${name + this.getLabelSeparator() + suffix}`; return name.substring(0, pos + 1) + suffix; }; const bb = arrBB[bbIdx]; - return hasName(asmArr, bb) ? asmArr[bb.end].text : generateName(bb.nameId, bb.end); + if (hasName(asmArr, bb)) return asmArr[bb.end].text; + const newBbName = generateName(bb.nameId, bb.end); + arrBB[bbIdx + 1].nameId = newBbName; + return newBbName; + } + + protected getLabelSeparator() { + return '@'; } protected splitToCanonicalBasicBlock(basicBlock: BBRange): CanonicalBB[] { @@ -234,7 +240,11 @@ export class BaseCFGParser { if (actPosSz === 1) return [ {nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1}, - {nameId: basicBlock.nameId + '@' + (actionPos[0] + 1), start: actionPos[0] + 1, end: basicBlock.end}, + { + nameId: basicBlock.nameId + this.getLabelSeparator() + (actionPos[0] + 1), + start: actionPos[0] + 1, + end: basicBlock.end, + }, ]; let cur = 0; @@ -244,14 +254,18 @@ export class BaseCFGParser { const result: CanonicalBB[] = []; result.push(_.clone(tmp)); while (cur !== last - 1) { - tmp.nameId = blockName + '@' + (actionPos[cur] + 1); + tmp.nameId = blockName + this.getLabelSeparator() + (actionPos[cur] + 1); tmp.start = actionPos[cur] + 1; ++cur; tmp.end = actionPos[cur] + 1; result.push(_.clone(tmp)); } - tmp = {nameId: blockName + '@' + (actionPos[cur] + 1), start: actionPos[cur] + 1, end: basicBlock.end}; + tmp = { + nameId: blockName + this.getLabelSeparator() + (actionPos[cur] + 1), + start: actionPos[cur] + 1, + end: basicBlock.end, + }; result.push(_.clone(tmp)); return result; diff --git a/lib/cfg/cfg-parsers/vc.ts b/lib/cfg/cfg-parsers/vc.ts new file mode 100644 index 000000000..08abd1530 --- /dev/null +++ b/lib/cfg/cfg-parsers/vc.ts @@ -0,0 +1,106 @@ +// Copyright (c) 2025, Compiler Explorer Authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +import _ from 'underscore'; + +import {AssemblyLine, BaseCFGParser, Range} from './base.js'; + +export class VcCFGParser extends BaseCFGParser { + static override get key() { + return 'vc'; + } + + override filterData(assembly: AssemblyLine[]): AssemblyLine[] { + // Keep only lines between a line that starts with '... PROC' and a line that ends with '... ENDP'. + // Remove lines that start with ';' + const removeComment = (line: AssemblyLine) => { + const pos = line.text.indexOf(';'); + let newText = line.text; + if (pos !== -1) { + newText = line.text.substring(0, pos).trimEnd(); + } + return {...line, text: newText}; + }; + const noCommentLines = assembly.map(removeComment).filter(line => line.text.length > 0); + const isFuncStart = (line: string) => { + return line.endsWith(' PROC'); + }; + const isFuncEnd = (line: string) => { + return line.endsWith(' ENDP'); + }; + + const newRes: AssemblyLine[] = []; + let inFunction = false; + for (const line of noCommentLines) { + if (isFuncStart(line.text)) { + inFunction = true; + } + if (inFunction) { + newRes.push(line); + } + if (isFuncEnd(line.text)) { + inFunction = false; + } + } + return newRes; + } + + override splitToFunctions(asmArr: AssemblyLine[]): Range[] { + if (asmArr.length === 0) return []; + const result: Range[] = []; + let cur = 1; + const last = asmArr.length; + const fnRange: Range = {start: 0, end: 0}; + do { + if (this.isFunctionEnd(asmArr[cur].text)) { + fnRange.end = cur + 1; + result.push(_.clone(fnRange)); + fnRange.start = cur + 1; + } + ++cur; + } while (cur < last); + + fnRange.end = last; + if (fnRange.end > fnRange.start + 1) result.push(_.clone(fnRange)); + return result; + } + + override isFunctionEnd(x: string) { + return x.endsWith('ENDP'); + } + override isBasicBlockEnd(inst: string, prevInst: string) { + // Keep ENDP line in the same block as prevInst. Might drop it entirely. + if (this.isFunctionEnd(inst)) return false; + return inst[0] === '$'; + } + + override extractJmpTargetName(inst: string) { + return inst.match(/\$.*/) + ':'; + } + + override getLabelSeparator() { + // `@` is used natively by MSVC labels, so we use `#` instead + return '#'; + } +} diff --git a/lib/cfg/cfg.ts b/lib/cfg/cfg.ts index 64d318249..4c7c597fa 100644 --- a/lib/cfg/cfg.ts +++ b/lib/cfg/cfg.ts @@ -25,7 +25,7 @@ import {CompilationResult} from '../../types/compilation/compilation.interfaces.js'; import type {CompilerInfo} from '../../types/compiler.interfaces.js'; -import {AssemblyLine, Edge, getParserByKey, Node} from './cfg-parsers/index.js'; +import {AssemblyLine, BaseCFGParser, Edge, getParserByKey, Node} from './cfg-parsers/index.js'; import {OatCFGParser} from './cfg-parsers/oat.js'; import {getInstructionSetByKey} from './instruction-sets/index.js'; @@ -66,9 +66,10 @@ export async function generateStructure( // dex2oat is a special case because it can output different instruction // sets. Create an OAT parser instead of searching by ISA. - const parser = compilerGroup?.includes('dex2oat') - ? new OatCFGParser(instructionSet) - : new (getParserByKey(compilerGroup))(instructionSet); + let parser: BaseCFGParser; + if (compilerGroup?.includes('dex2oat')) parser = new OatCFGParser(instructionSet); + else if (compilerGroup?.startsWith('vc')) parser = new (getParserByKey('vc'))(instructionSet); + else parser = new (getParserByKey(compilerGroup))(instructionSet); let code = parser.filterData(asmArr); code = await parser.processFuncNames(code, fullRes); diff --git a/test/cfg-cases/cfg-msvc.json b/test/cfg-cases/cfg-msvc.json new file mode 100644 index 000000000..21bc0b568 --- /dev/null +++ b/test/cfg-cases/cfg-msvc.json @@ -0,0 +1,426 @@ +{ + "asm": [ + { + "text": "int do_some_work(int) PROC", + "source": null + }, + { + "text": " mov DWORD PTR [rsp+8], ecx", + "source": { + "file": null, + "line": 4 + } + }, + { + "text": " cmp DWORD PTR x$[rsp], 6", + "source": { + "file": null, + "line": 5 + } + }, + { + "text": " jle SHORT $LN2@do_some_wo", + "source": { + "file": null, + "line": 5 + } + }, + { + "text": " jmp SHORT $LN3@do_some_wo", + "source": { + "file": null, + "line": 6 + } + }, + { + "text": " jmp SHORT $endlabel$5", + "source": { + "file": null, + "line": 6 + } + }, + { + "text": "$LN2@do_some_wo:", + "source": null + }, + { + "text": " mov eax, DWORD PTR x$[rsp]", + "source": { + "file": null, + "line": 7 + } + }, + { + "text": " add eax, 8", + "source": { + "file": null, + "line": 7 + } + }, + { + "text": " jmp SHORT $LN1@do_some_wo", + "source": { + "file": null, + "line": 7 + } + }, + { + "text": "$LN3@do_some_wo:", + "source": null + }, + { + "text": "$endlabel$5:", + "source": null + }, + { + "text": " mov eax, DWORD PTR x$[rsp]", + "source": { + "file": null, + "line": 9 + } + }, + { + "text": "$LN1@do_some_wo:", + "source": null + }, + { + "text": " ret 0", + "source": { + "file": null, + "line": 10 + } + }, + { + "text": "int do_some_work(int) ENDP", + "source": null + }, + { + "text": "void process_items(std::vector > const &) PROC", + "source": null + }, + { + "text": "$LN6:", + "source": null + }, + { + "text": " mov QWORD PTR [rsp+8], rcx", + "source": { + "file": null, + "line": 12 + } + }, + { + "text": " sub rsp, 56", + "source": { + "file": null, + "line": 12 + } + }, + { + "text": " mov rcx, QWORD PTR vec$[rsp]", + "source": { + "file": null, + "line": 13 + } + }, + { + "text": " call unsigned __int64 std::vector >::size(void)const", + "source": { + "file": null, + "line": 13 + } + }, + { + "text": " mov QWORD PTR sz$[rsp], rax", + "source": { + "file": null, + "line": 13 + } + }, + { + "text": " mov QWORD PTR i$1[rsp], 0", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": " jmp SHORT $LN4@process_it", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": "$LN2@process_it:", + "source": null + }, + { + "text": " mov rax, QWORD PTR i$1[rsp]", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": " inc rax", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": " mov QWORD PTR i$1[rsp], rax", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": "$LN4@process_it:", + "source": null + }, + { + "text": " mov rax, QWORD PTR sz$[rsp]", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": " cmp QWORD PTR i$1[rsp], rax", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": " jae SHORT $LN3@process_it", + "source": { + "file": null, + "line": 14 + } + }, + { + "text": " mov rdx, QWORD PTR i$1[rsp]", + "source": { + "file": null, + "line": 15 + } + }, + { + "text": " mov rcx, QWORD PTR vec$[rsp]", + "source": { + "file": null, + "line": 15 + } + }, + { + "text": " call int const & std::vector >::operator[](unsigned __int64)const", + "source": { + "file": null, + "line": 15 + } + }, + { + "text": " mov ecx, DWORD PTR [rax]", + "source": { + "file": null, + "line": 15 + } + }, + { + "text": " call int do_some_work(int)", + "source": { + "file": null, + "line": 15 + } + }, + { + "text": " npad 1", + "source": { + "file": null, + "line": 15 + } + }, + { + "text": " jmp SHORT $LN2@process_it", + "source": { + "file": null, + "line": 16 + } + }, + { + "text": "$LN3@process_it:", + "source": null + }, + { + "text": " add rsp, 56", + "source": { + "file": null, + "line": 17 + } + }, + { + "text": " ret 0", + "source": { + "file": null, + "line": 17 + } + }, + { + "text": "void process_items(std::vector > const &) ENDP", + "source": null + } + ], + "cfg": { + "int do_some_work(int) PROC": { + "nodes": [ + { + "id": "int do_some_work(int) PROC", + "label": "int do_some_work(int) PROC:\n mov DWORD PTR [rsp+8], ecx\n cmp DWORD PTR x$[rsp], 6\n jle SHORT $LN2@do_some_wo" + }, + { + "id": "int do_some_work(int) PROC#4", + "label": "int do_some_work(int) PROC#4:\n jmp SHORT $LN3@do_some_wo" + }, + { + "id": "int do_some_work(int) PROC#5", + "label": "int do_some_work(int) PROC#5:\n jmp SHORT $endlabel$5" + }, + { + "id": "$LN2@do_some_wo:", + "label": "$LN2@do_some_wo:\n mov eax, DWORD PTR x$[rsp]\n add eax, 8\n jmp SHORT $LN1@do_some_wo" + }, + { + "id": "$LN3@do_some_wo:", + "label": "$LN3@do_some_wo:\n" + }, + { + "id": "$endlabel$5:", + "label": "$endlabel$5:\n mov eax, DWORD PTR x$[rsp]" + }, + { + "id": "$LN1@do_some_wo:", + "label": "$LN1@do_some_wo:\n ret 0\nint do_some_work(int) ENDP" + } + ], + "edges": [ + { + "from": "int do_some_work(int) PROC", + "to": "$LN2@do_some_wo:", + "arrows": "to", + "color": "green" + }, + { + "from": "int do_some_work(int) PROC", + "to": "int do_some_work(int) PROC#4", + "arrows": "to", + "color": "red" + }, + { + "from": "int do_some_work(int) PROC#4", + "to": "$LN3@do_some_wo:", + "arrows": "to", + "color": "blue" + }, + { + "from": "int do_some_work(int) PROC#5", + "to": "$endlabel$5:", + "arrows": "to", + "color": "blue" + }, + { + "from": "$LN2@do_some_wo:", + "to": "$LN1@do_some_wo:", + "arrows": "to", + "color": "blue" + }, + { + "from": "$LN3@do_some_wo:", + "to": "$endlabel$5:", + "arrows": "to", + "color": "grey" + }, + { + "from": "$endlabel$5:", + "to": "$LN1@do_some_wo:", + "arrows": "to", + "color": "grey" + }, + { + "from": "$LN1@do_some_wo:", + "to": "void process_items(std::vector > const &) PROC", + "arrows": "to", + "color": "grey" + } + ] + }, + "void process_items(std::vector > const &) PROC": { + "nodes": [ + { + "id": "void process_items(std::vector > const &) PROC", + "label": "void process_items(std::vector > const &) PROC\n" + }, + { + "id": "$LN6:", + "label": "$LN6:\n mov QWORD PTR [rsp+8], rcx\n sub rsp, 56\n mov rcx, QWORD PTR vec$[rsp]\n call unsigned __int64 std::vector >::size(void)const\n mov QWORD PTR sz$[rsp], rax\n mov QWORD PTR i$1[rsp], 0\n jmp SHORT $LN4@process_it" + }, + { + "id": "$LN2@process_it:", + "label": "$LN2@process_it:\n mov rax, QWORD PTR i$1[rsp]\n inc rax\n mov QWORD PTR i$1[rsp], rax" + }, + { + "id": "$LN4@process_it:", + "label": "$LN4@process_it:\n mov rax, QWORD PTR sz$[rsp]\n cmp QWORD PTR i$1[rsp], rax\n jae SHORT $LN3@process_it" + }, + { + "id": "$LN4@process_it:#33", + "label": "$LN4@process_it:#33\n mov rdx, QWORD PTR i$1[rsp]\n mov rcx, QWORD PTR vec$[rsp]\n call int const & std::vector >::operator[](unsigned __int64)const\n mov ecx, DWORD PTR [rax]\n call int do_some_work(int)\n npad 1\n jmp SHORT $LN2@process_it" + }, + { + "id": "$LN3@process_it:", + "label": "$LN3@process_it:\n add rsp, 56\n ret 0\nvoid process_items(std::vector > const &) ENDP" + } + ], + "edges": [ + { + "from": "void process_items(std::vector > const &) PROC", + "to": "$LN6:", + "arrows": "to", + "color": "grey" + }, + { + "from": "$LN6:", + "to": "$LN4@process_it:", + "arrows": "to", + "color": "blue" + }, + { + "from": "$LN2@process_it:", + "to": "$LN4@process_it:", + "arrows": "to", + "color": "grey" + }, + { + "from": "$LN4@process_it:", + "to": "$LN3@process_it:", + "arrows": "to", + "color": "green" + }, + { + "from": "$LN4@process_it:", + "to": "$LN4@process_it:#33", + "arrows": "to", + "color": "red" + }, + { + "from": "$LN4@process_it:#33", + "to": "$LN2@process_it:", + "arrows": "to", + "color": "blue" + } + ] + } + } +} \ No newline at end of file diff --git a/test/cfg-tests.ts b/test/cfg-tests.ts index 57bb35cf3..d5c2fa62b 100644 --- a/test/cfg-tests.ts +++ b/test/cfg-tests.ts @@ -33,7 +33,7 @@ import {generateStructure} from '../lib/cfg/cfg.js'; import {CompilerInfo} from '../types/compiler.interfaces.js'; import {makeFakeCompilerInfo, resolvePathFromTestRoot} from './utils.js'; -async function DoCfgTest(cfgArg, filename, isLlvmIr = false, compilerInfo?: CompilerInfo) { +async function DoCfgTest(cfgArg: string, filename: string, isLlvmIr = false, compilerInfo?: CompilerInfo) { const contents = JSON.parse(await fs.readFile(filename, 'utf8')); if (!compilerInfo) { compilerInfo = makeFakeCompilerInfo({ @@ -69,6 +69,19 @@ describe('Cfg test cases', () => { } }); + describe('msvc', () => { + const msvcCompilerInfo = makeFakeCompilerInfo({ + group: 'vc', + version: 'vc2022', + compilerType: 'vc', + }); + for (const filename of files.filter(x => x.includes('msvc'))) { + it(filename, async () => { + await DoCfgTest('vc', path.join(testcasespath, filename), false, msvcCompilerInfo); + }); + } + }); + describe('llvmir', () => { for (const filename of files.filter(x => x.includes('llvmir'))) { it(filename, async () => {