CFG for Python (#7855)

This commit is contained in:
Ofek
2025-07-01 21:49:20 +03:00
committed by GitHub
parent e2bbbc16ef
commit 1b1055d2cb
11 changed files with 1077 additions and 41 deletions

View File

@@ -27,3 +27,4 @@ export {ClangCFGParser} from './clang.js';
export {GccCFGParser} from './gcc.js';
export {LlvmIrCfgParser} from './llvm-ir.js';
export {OatCFGParser} from './oat.js';
export {PythonCFGParser} from './python.js';

View File

@@ -98,6 +98,15 @@ export class BaseCFGParser {
return result;
}
protected getBbId(firstInst: string): string {
return firstInst;
}
protected getBbFirstInstIdx(firstLine: number) {
//inst is expected to be .L*: where * in 1,2,...
return firstLine + 1;
}
protected splitToBasicBlocks(asmArr: AssemblyLine[], range: Range) {
let first = range.start;
const last = range.end;
@@ -117,11 +126,11 @@ export class BaseCFGParser {
while (first < last) {
const inst = asmArr[first].text;
if (this.isBasicBlockEnd(inst, asmArr[first - 1] ? asmArr[first - 1].text : '')) {
const prevInst = asmArr[first - 1] ? asmArr[first - 1].text : '';
if (this.isBasicBlockEnd(inst, prevInst)) {
rangeBb.end = first;
result.push(_.clone(rangeBb));
//inst is expected to be .L*: where * in 1,2,...
rangeBb = newRangeWith(rangeBb, inst, first + 1);
rangeBb = newRangeWith(rangeBb, this.getBbId(inst), this.getBbFirstInstIdx(first));
} else if (this.instructionSetInfo.isJmpInstruction(inst)) {
rangeBb.actionPos.push(first);
}
@@ -178,10 +187,30 @@ export class BaseCFGParser {
return inst[0] === '.' || prevInst.includes(' ret');
}
protected extractNodeName(inst: string) {
protected extractJmpTargetName(inst: string) {
return inst.match(/\.L\d+/) + ':';
}
protected extractNodeIdFromInst(inst: string) {
return inst;
}
protected extractAltJmpTargetName(asmArr: AssemblyLine[], bbIdx: number, arrBB: CanonicalBB[]): string {
const hasName = (asmArr: AssemblyLine[], cbb: CanonicalBB) => {
const asm = asmArr[cbb.end];
return asm ? this.isBasicBlockEnd(asm.text, '') : false;
};
const generateName = (name: string, suffix: number) => {
const pos = name.indexOf('@');
if (pos === -1) return `${name}@${suffix}`;
return name.substring(0, pos + 1) + suffix;
};
const bb = arrBB[bbIdx];
return hasName(asmArr, bb) ? asmArr[bb.end].text : generateName(bb.nameId, bb.end);
}
protected splitToCanonicalBasicBlock(basicBlock: BBRange): CanonicalBB[] {
const actionPos = basicBlock.actionPos;
let actPosSz = actionPos.length;
@@ -247,38 +276,26 @@ export class BaseCFGParser {
color: color,
});
const hasName = (asmArr: AssemblyLine[], cbb: CanonicalBB) => {
const asm = asmArr[cbb.end];
return asm ? this.isBasicBlockEnd(asm.text, '') : false;
};
const generateName = (name: string, suffix: number) => {
const pos = name.indexOf('@');
if (pos === -1) return `${name}@${suffix}`;
return name.substring(0, pos + 1) + suffix;
};
/* note: x.end-1 possible values:
jmp .L*, {jne,je,jg,...} .L*, ret/rep ret, call and any other instruction that doesn't change control flow
*/
for (const x of arrOfCanonicalBasicBlock) {
let targetNode;
for (const [i, x] of arrOfCanonicalBasicBlock.entries()) {
const lastInst = asmArr[x.end - 1].text;
switch (this.instructionSetInfo.getInstructionType(lastInst)) {
case InstructionType.jmp: {
//we have to deal only with jmp destination, jmp instruction are always taken.
//edge from jump inst
targetNode = this.extractNodeName(lastInst);
const targetNode = this.extractJmpTargetName(lastInst);
edges.push(setEdge(x.nameId, targetNode, 'blue'));
break;
}
case InstructionType.conditionalJmpInst: {
//deal with : branch taken, branch not taken
targetNode = this.extractNodeName(lastInst);
edges.push(setEdge(x.nameId, targetNode, 'green'));
targetNode = hasName(asmArr, x) ? asmArr[x.end].text : generateName(x.nameId, x.end);
edges.push(setEdge(x.nameId, targetNode, 'red'));
const targetNode1 = this.extractJmpTargetName(lastInst);
edges.push(setEdge(x.nameId, targetNode1, 'green'));
const targetNode2 = this.extractAltJmpTargetName(asmArr, i, arrOfCanonicalBasicBlock);
edges.push(setEdge(x.nameId, targetNode2, 'red'));
break;
}
case InstructionType.notRetInst: {
@@ -286,7 +303,7 @@ export class BaseCFGParser {
//note : asmArr[x.end] expected to be .L*:(name of a basic block)
// this .L*: has to be exactly after the last instruction in the current canonical basic block
if (asmArr[x.end]) {
targetNode = asmArr[x.end].text;
const targetNode = this.extractNodeIdFromInst(asmArr[x.end].text);
edges.push(setEdge(x.nameId, targetNode, 'grey'));
}
break;

View File

@@ -49,7 +49,7 @@ export class ClangCFGParser extends BaseCFGParser {
return this.filterTextSection(assembly).map(_.clone).filter(isCode).map(removeComments);
}
override extractNodeName(inst: string) {
override extractJmpTargetName(inst: string) {
return inst.match(/\.LBB\d+_\d+/) + ':';
}
}

View File

@@ -40,7 +40,7 @@ export class GccCFGParser extends BaseCFGParser {
return this.filterTextSection(assembly).map(_.clone).filter(isCode);
}
override extractNodeName(inst: string) {
override extractJmpTargetName(inst: string) {
return inst.match(/\.L\d+/) + ':';
}
}

View File

@@ -167,13 +167,13 @@ export class OatCFGParser extends BaseCFGParser {
if (this.isBasicBlockEnd(inst, asmArr[first - 1] ? asmArr[first - 1].text : '')) {
rangeBb.end = first;
result.push(_.clone(rangeBb));
rangeBb = newRangeWith(rangeBb, this.extractNodeName(inst), first + 1);
rangeBb = newRangeWith(rangeBb, this.extractJmpTargetName(inst), first + 1);
} else if (this.instructionSetInfo.isJmpInstruction(opcode)) {
rangeBb.actionPos.push(first);
} else if (this.isJmpTarget(inst, jmpAddrs)) {
rangeBb.end = first;
result.push(_.clone(rangeBb));
rangeBb = newRangeWith(rangeBb, this.extractNodeName(inst), first);
rangeBb = newRangeWith(rangeBb, this.extractJmpTargetName(inst), first);
}
++first;
}
@@ -189,7 +189,7 @@ export class OatCFGParser extends BaseCFGParser {
}
// All nodes are named after the address of their first instruction.
override extractNodeName(inst: string) {
override extractJmpTargetName(inst: string) {
return this.shortenHex(this.getPc(inst));
}
@@ -214,7 +214,7 @@ export class OatCFGParser extends BaseCFGParser {
return [
{nameId: basicBlock.nameId, start: basicBlock.start, end: actionPos[0] + 1},
{
nameId: this.extractNodeName(this.code[actionPos[0] + 1].text),
nameId: this.extractJmpTargetName(this.code[actionPos[0] + 1].text),
start: actionPos[0] + 1,
end: basicBlock.end,
},
@@ -227,7 +227,7 @@ export class OatCFGParser extends BaseCFGParser {
const result: CanonicalBB[] = [];
result.push(_.clone(tmp));
while (first !== last - 1) {
tmp.nameId = this.extractNodeName(this.code[actionPos[first] + 1].text);
tmp.nameId = this.extractJmpTargetName(this.code[actionPos[first] + 1].text);
tmp.start = actionPos[first] + 1;
++first;
tmp.end = actionPos[first] + 1;
@@ -235,7 +235,7 @@ export class OatCFGParser extends BaseCFGParser {
}
tmp = {
nameId: this.extractNodeName(this.code[actionPos[first] + 1].text),
nameId: this.extractJmpTargetName(this.code[actionPos[first] + 1].text),
start: actionPos[first] + 1,
end: basicBlock.end,
};
@@ -269,7 +269,7 @@ export class OatCFGParser extends BaseCFGParser {
targetNode = this.shortenHex(this.getJmpAddr(lastInst));
edges.push(setEdge(x.nameId, targetNode, 'green'));
// Branch not taken
targetNode = this.extractNodeName(asmArr[x.end].text);
targetNode = this.extractJmpTargetName(asmArr[x.end].text);
edges.push(setEdge(x.nameId, targetNode, 'red'));
break;
}
@@ -277,7 +277,7 @@ export class OatCFGParser extends BaseCFGParser {
// No jmp, but the next instruction is in a different basic
// block because it is the target of another jmp.
if (asmArr[x.end]) {
targetNode = this.extractNodeName(asmArr[x.end].text);
targetNode = this.extractJmpTargetName(asmArr[x.end].text);
edges.push(setEdge(x.nameId, targetNode, 'grey'));
}
break;

View File

@@ -0,0 +1,100 @@
// Copyright (c) 2025, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import {ResultLine} from '../../../types/resultline/resultline.interfaces.js';
import {InstructionType} from '../instruction-sets/base.js';
import {AssemblyLine, BaseCFGParser, CanonicalBB} from './base.js';
export class PythonCFGParser extends BaseCFGParser {
static override get key() {
return 'python3';
}
override isFunctionEnd(x: string) {
return x.startsWith('Disassembly of');
}
override isBasicBlockEnd(inst: string, prevInst: string) {
if (inst.includes('>>'))
// jmp target
return true;
// Probably applicable to non-python CFGs too:
return this.instructionSetInfo.getInstructionType(prevInst) !== InstructionType.notRetInst;
}
override filterData(bytecode: ResultLine[]) {
// Filter out module prefix before first function,
// replace 'Disassembly of' with 'Function #<idx>'
const res: ResultLine[] = [];
let i = 0;
while (
i < bytecode.length &&
!bytecode[i].text.startsWith('Disassembly of') &&
!bytecode[i].text.startsWith('Function #')
) {
i++;
}
let funcIdx = 0;
for (let j = i; j < bytecode.length; j++) {
const line = bytecode[j];
if (!line.text) continue;
if (line.text.startsWith('Disassembly of')) {
line.text = `Function #${funcIdx++}`;
}
res.push(line);
}
return res;
}
// 10 POP_JUMP_IF_FALSE 5 (to 22) ===> captures "22"
override extractJmpTargetName(inst: string) {
const candidateName = inst.match(/\(to (\d+)\)$/);
return candidateName ? candidateName[1] : '';
}
//' 6 >> 22 LOAD_FAST 0 (num):' ==> '22'
//' 4 12 LOAD_FAST 0 (num):' ==> '12'
//' >> 140 FOR_ITER 98 (to 340)' ==> 140
override getBbId(firstInst: string): string {
return firstInst.match(/^\s*(\d+)?\s+>?>?\s+(\d+)/)?.[2] ?? '';
}
override getBbFirstInstIdx(firstLine: number) {
return firstLine;
}
override extractNodeIdFromInst(inst: string) {
return this.getBbId(inst);
}
override extractAltJmpTargetName(asmArr: AssemblyLine[], bbIdx: number, arrBB: CanonicalBB[]): string {
const nextBbStart = arrBB[bbIdx + 1]?.start;
if (!nextBbStart) return '';
const inst = asmArr[nextBbStart];
return this.getBbId(inst.text);
}
}

View File

@@ -25,3 +25,4 @@
export {ArmInstructionSetInfo} from './arm.js';
export {BaseInstructionSetInfo} from './base.js';
export {LlvmIrInstructionSetInfo} from './llvm-ir.js';
export {PythonInstructionSetInfo} from './python.js';

View File

@@ -0,0 +1,58 @@
// Copyright (c) 2025, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import {InstructionSet} from '../../../types/instructionsets.js';
import {BaseInstructionSetInfo, InstructionType} from './base.js';
export class PythonInstructionSetInfo extends BaseInstructionSetInfo {
static override get key(): InstructionSet {
return 'python';
}
// Jump opcodes obtained on python 3.12 via:
// import dis
// import opcode
// print([opcode.opname[op] for op in dis.hasjump])
static conditionalJumps = new RegExp(
['POP_JUMP_IF_FALSE', 'POP_JUMP_IF_NONE', 'POP_JUMP_IF_NOT_NONE', 'POP_JUMP_IF_TRUE'].join('|'),
);
static unconditionalJumps = new RegExp(
['JUMP_BACKWARD', 'JUMP_BACKWARD_NO_INTERRUPT', 'JUMP_FORWARD', 'JUMP', 'JUMP_NO_INTERRUPT'].join('|'),
);
static returnInstruction = new RegExp(
['RETURN_VALUE', 'RETURN_CONST', 'RETURN_GENERATOR', 'YIELD_VALUE', 'SEND'].join('|'),
);
override isJmpInstruction(instruction: string) {
return instruction.includes('JUMP');
}
override getInstructionType(instruction: string) {
if (PythonInstructionSetInfo.conditionalJumps.test(instruction)) return InstructionType.conditionalJmpInst;
if (PythonInstructionSetInfo.unconditionalJumps.test(instruction)) return InstructionType.jmp;
if (PythonInstructionSetInfo.returnInstruction.test(instruction)) return InstructionType.retInst;
return InstructionType.notRetInst;
}
}

197
test/cfg-cases/cfg-python.basic.json generated Normal file
View File

@@ -0,0 +1,197 @@
{
"asm": [
{
"text": " 0 0 RESUME 0",
"source": {
"line": 0,
"file": null
}
},
{
"text": "",
"source": {
"line": null,
"file": null
}
},
{
"text": " 1 2 LOAD_CONST 0 (<code object square at 0x55a2b8e95ff0, file \"example.py\", line 1>)",
"source": {
"line": 1,
"file": null
}
},
{
"text": " 4 MAKE_FUNCTION 0",
"source": {
"line": 1,
"file": null
}
},
{
"text": " 6 STORE_NAME 0 (square)",
"source": {
"line": 1,
"file": null
}
},
{
"text": " 8 RETURN_CONST 1 (None)",
"source": {
"line": 1,
"file": null
}
},
{
"text": "",
"source": {
"line": null,
"file": null
}
},
{
"text": "Disassembly of <code object square at 0x55a2b8e95ff0, file \"example.py\", line 1>:",
"source": {
"line": null,
"file": null
}
},
{
"text": " 1 0 RESUME 0",
"source": {
"line": 1,
"file": null
}
},
{
"text": "",
"source": {
"line": null,
"file": null
}
},
{
"text": " 3 2 LOAD_FAST 0 (num)",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 4 LOAD_CONST 1 (5)",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 6 COMPARE_OP 68 (>)",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 10 POP_JUMP_IF_FALSE 5 (to 22)",
"source": {
"line": 3,
"file": null
}
},
{
"text": "",
"source": {
"line": null,
"file": null
}
},
{
"text": " 4 12 LOAD_FAST 0 (num)",
"source": {
"line": 4,
"file": null
}
},
{
"text": " 14 LOAD_FAST 0 (num)",
"source": {
"line": 4,
"file": null
}
},
{
"text": " 16 BINARY_OP 5 (*)",
"source": {
"line": 4,
"file": null
}
},
{
"text": " 20 RETURN_VALUE",
"source": {
"line": 4,
"file": null
}
},
{
"text": "",
"source": {
"line": null,
"file": null
}
},
{
"text": " 6 >> 22 LOAD_FAST 0 (num)",
"source": {
"line": 6,
"file": null
}
},
{
"text": " 24 RETURN_VALUE",
"source": {
"line": 6,
"file": null
}
},
{
"text": "",
"source": {
"line": null,
"file": null
}
}
],
"cfg": {
"Function #0": {
"nodes": [
{
"id": "Function #0",
"label": "Function #0:\n 1 0 RESUME 0\n 3 2 LOAD_FAST 0 (num)\n 4 LOAD_CONST 1 (5)\n 6 COMPARE_OP 68 (>)\n 10 POP_JUMP_IF_FALSE 5 (to 22)"
},
{
"id": "12",
"label": "12:\n 4 12 LOAD_FAST 0 (num)\n 14 LOAD_FAST 0 (num)\n 16 BINARY_OP 5 (*)\n 20 RETURN_VALUE"
},
{
"id": "22",
"label": "22:\n 6 >> 22 LOAD_FAST 0 (num)\n 24 RETURN_VALUE"
}
],
"edges": [
{
"from": "Function #0",
"to": "22",
"arrows": "to",
"color": "green"
},
{
"from": "Function #0",
"to": "12",
"arrows": "to",
"color": "red"
}
]
}
}
}

647
test/cfg-cases/cfg-python.basic2.json generated Normal file
View File

@@ -0,0 +1,647 @@
{
"asm": [
{
"text": "Function #0",
"source": {
"line": null,
"file": null
}
},
{
"text": " 2 0 RESUME 0",
"source": {
"line": 2,
"file": null
}
},
{
"text": " 3 2 LOAD_GLOBAL 0 (os)",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 12 LOAD_ATTR 2 (path)",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 32 LOAD_ATTR 5 (NULL|self + exists)",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 52 LOAD_CONST 1 ('power/.complete-precache')",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 54 CALL 1",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 62 POP_JUMP_IF_TRUE 10 (to 84)",
"source": {
"line": 3,
"file": null
}
},
{
"text": " 4 64 LOAD_GLOBAL 7 (NULL + precache)",
"source": {
"line": 4,
"file": null
}
},
{
"text": " 74 CALL 0",
"source": {
"line": 4,
"file": null
}
},
{
"text": " 82 POP_TOP",
"source": {
"line": 4,
"file": null
}
},
{
"text": " 6 >> 84 LOAD_GLOBAL 9 (NULL + Path)",
"source": {
"line": 6,
"file": null
}
},
{
"text": " 94 LOAD_CONST 2 ('power')",
"source": {
"line": 6,
"file": null
}
},
{
"text": " 96 CALL 1",
"source": {
"line": 6,
"file": null
}
},
{
"text": " 104 STORE_FAST 0 (path)",
"source": {
"line": 6,
"file": null
}
},
{
"text": " 8 106 LOAD_FAST 0 (path)",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 108 LOAD_ATTR 11 (NULL|self + glob)",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 128 LOAD_CONST 3 ('*.html')",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 130 CALL 1",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 138 GET_ITER",
"source": {
"line": 8,
"file": null
}
},
{
"text": " >> 140 FOR_ITER 98 (to 340)",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 144 STORE_FAST 1 (page)",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 9 146 LOAD_FAST 1 (page)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 148 LOAD_ATTR 13 (NULL|self + absolute)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 168 CALL 0",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 176 LOAD_ATTR 15 (NULL|self + open)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 196 LOAD_CONST 4 ('r')",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 198 CALL 1",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 206 BEFORE_WITH",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 208 STORE_FAST 2 (fp)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 10 210 LOAD_GLOBAL 17 (NULL + BeautifulSoup)",
"source": {
"line": 10,
"file": null
}
},
{
"text": " 220 LOAD_FAST 2 (fp)",
"source": {
"line": 10,
"file": null
}
},
{
"text": " 222 LOAD_ATTR 19 (NULL|self + read)",
"source": {
"line": 10,
"file": null
}
},
{
"text": " 242 CALL 0",
"source": {
"line": 10,
"file": null
}
},
{
"text": " 250 LOAD_CONST 5 ('html.parser')",
"source": {
"line": 10,
"file": null
}
},
{
"text": " 252 CALL 2",
"source": {
"line": 10,
"file": null
}
},
{
"text": " 260 STORE_FAST 3 (soup)",
"source": {
"line": 10,
"file": null
}
},
{
"text": " 11 262 LOAD_GLOBAL 21 (NULL + _cleanup)",
"source": {
"line": 11,
"file": null
}
},
{
"text": " 272 LOAD_FAST 3 (soup)",
"source": {
"line": 11,
"file": null
}
},
{
"text": " 274 CALL 1",
"source": {
"line": 11,
"file": null
}
},
{
"text": " 282 STORE_FAST 4 (clean_soup)",
"source": {
"line": 11,
"file": null
}
},
{
"text": " 12 284 LOAD_FAST 4 (clean_soup)",
"source": {
"line": 12,
"file": null
}
},
{
"text": " 286 GET_ITER",
"source": {
"line": 12,
"file": null
}
},
{
"text": " >> 288 FOR_ITER 13 (to 318)",
"source": {
"line": 12,
"file": null
}
},
{
"text": " 292 STORE_FAST 5 (tag)",
"source": {
"line": 12,
"file": null
}
},
{
"text": " 13 294 LOAD_GLOBAL 23 (NULL + print)",
"source": {
"line": 13,
"file": null
}
},
{
"text": " 304 LOAD_FAST 5 (tag)",
"source": {
"line": 13,
"file": null
}
},
{
"text": " 306 CALL 1",
"source": {
"line": 13,
"file": null
}
},
{
"text": " 314 POP_TOP",
"source": {
"line": 13,
"file": null
}
},
{
"text": " 316 JUMP_BACKWARD 15 (to 288)",
"source": {
"line": 13,
"file": null
}
},
{
"text": " 12 >> 318 END_FOR",
"source": {
"line": 12,
"file": null
}
},
{
"text": " 320 NOP",
"source": {
"line": 12,
"file": null
}
},
{
"text": " 9 322 LOAD_CONST 0 (None)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 324 LOAD_CONST 0 (None)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 326 LOAD_CONST 0 (None)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 328 CALL 2",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 336 POP_TOP",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 338 JUMP_BACKWARD 100 (to 140)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 8 >> 340 END_FOR",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 342 RETURN_CONST 0 (None)",
"source": {
"line": 8,
"file": null
}
},
{
"text": " 9 >> 344 PUSH_EXC_INFO",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 346 WITH_EXCEPT_START",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 348 POP_JUMP_IF_TRUE 1 (to 352)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 350 RERAISE 2",
"source": {
"line": 9,
"file": null
}
},
{
"text": " >> 352 POP_TOP",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 354 POP_EXCEPT",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 356 POP_TOP",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 358 POP_TOP",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 360 JUMP_BACKWARD 111 (to 140)",
"source": {
"line": 9,
"file": null
}
},
{
"text": " >> 362 COPY 3",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 364 POP_EXCEPT",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 366 RERAISE 1",
"source": {
"line": 9,
"file": null
}
},
{
"text": "ExceptionTable:",
"source": {
"line": 9,
"file": null
}
},
{
"text": " 208 to 318 -> 344 [2] lasti",
"source": {
"line": 208,
"file": null
}
},
{
"text": " 344 to 352 -> 362 [4] lasti",
"source": {
"line": 344,
"file": null
}
}
],
"cfg": {
"Function #0": {
"nodes": [
{
"id": "Function #0",
"label": "Function #0:\n 2 0 RESUME 0\n 3 2 LOAD_GLOBAL 0 (os)\n 12 LOAD_ATTR 2 (path)\n 32 LOAD_ATTR 5 (NULL|self + exists)\n 52 LOAD_CONST 1 ('power/.complete-precache')\n 54 CALL 1\n 62 POP_JUMP_IF_TRUE 10 (to 84)"
},
{
"id": "64",
"label": "64:\n 4 64 LOAD_GLOBAL 7 (NULL + precache)\n 74 CALL 0\n 82 POP_TOP"
},
{
"id": "84",
"label": "84:\n 6 >> 84 LOAD_GLOBAL 9 (NULL + Path)\n 94 LOAD_CONST 2 ('power')\n 96 CALL 1\n 104 STORE_FAST 0 (path)\n 8 106 LOAD_FAST 0 (path)\n 108 LOAD_ATTR 11 (NULL|self + glob)\n 128 LOAD_CONST 3 ('*.html')\n 130 CALL 1\n 138 GET_ITER"
},
{
"id": "140",
"label": "140:\n >> 140 FOR_ITER 98 (to 340)\n 144 STORE_FAST 1 (page)\n 9 146 LOAD_FAST 1 (page)\n 148 LOAD_ATTR 13 (NULL|self + absolute)\n 168 CALL 0\n 176 LOAD_ATTR 15 (NULL|self + open)\n 196 LOAD_CONST 4 ('r')\n 198 CALL 1\n 206 BEFORE_WITH\n 208 STORE_FAST 2 (fp)\n 10 210 LOAD_GLOBAL 17 (NULL + BeautifulSoup)\n 220 LOAD_FAST 2 (fp)\n 222 LOAD_ATTR 19 (NULL|self + read)\n 242 CALL 0\n 250 LOAD_CONST 5 ('html.parser')\n 252 CALL 2\n 260 STORE_FAST 3 (soup)\n 11 262 LOAD_GLOBAL 21 (NULL + _cleanup)\n 272 LOAD_FAST 3 (soup)\n 274 CALL 1\n 282 STORE_FAST 4 (clean_soup)\n 12 284 LOAD_FAST 4 (clean_soup)\n 286 GET_ITER"
},
{
"id": "288",
"label": "288:\n >> 288 FOR_ITER 13 (to 318)\n 292 STORE_FAST 5 (tag)\n 13 294 LOAD_GLOBAL 23 (NULL + print)\n 304 LOAD_FAST 5 (tag)\n 306 CALL 1\n 314 POP_TOP\n 316 JUMP_BACKWARD 15 (to 288)"
},
{
"id": "318",
"label": "318:\n 12 >> 318 END_FOR\n 320 NOP\n 9 322 LOAD_CONST 0 (None)\n 324 LOAD_CONST 0 (None)\n 326 LOAD_CONST 0 (None)\n 328 CALL 2\n 336 POP_TOP\n 338 JUMP_BACKWARD 100 (to 140)"
},
{
"id": "340",
"label": "340:\n 8 >> 340 END_FOR\n 342 RETURN_CONST 0 (None)"
},
{
"id": "344",
"label": "344:\n 9 >> 344 PUSH_EXC_INFO\n 346 WITH_EXCEPT_START\n 348 POP_JUMP_IF_TRUE 1 (to 352)"
},
{
"id": "350",
"label": "350:\n 350 RERAISE 2"
},
{
"id": "352",
"label": "352:\n >> 352 POP_TOP\n 354 POP_EXCEPT\n 356 POP_TOP\n 358 POP_TOP\n 360 JUMP_BACKWARD 111 (to 140)"
},
{
"id": "362",
"label": "362:\n >> 362 COPY 3\n 364 POP_EXCEPT\n 366 RERAISE 1\nExceptionTable:\n 208 to 318 -> 344 [2] lasti\n 344 to 352 -> 362 [4] lasti"
}
],
"edges": [
{
"from": "Function #0",
"to": "84",
"arrows": "to",
"color": "green"
},
{
"from": "Function #0",
"to": "64",
"arrows": "to",
"color": "red"
},
{
"from": "64",
"to": "84",
"arrows": "to",
"color": "grey"
},
{
"from": "84",
"to": "140",
"arrows": "to",
"color": "grey"
},
{
"from": "140",
"to": "288",
"arrows": "to",
"color": "grey"
},
{
"from": "288",
"to": "288",
"arrows": "to",
"color": "blue"
},
{
"from": "318",
"to": "140",
"arrows": "to",
"color": "blue"
},
{
"from": "344",
"to": "352",
"arrows": "to",
"color": "green"
},
{
"from": "344",
"to": "350",
"arrows": "to",
"color": "red"
},
{
"from": "350",
"to": "352",
"arrows": "to",
"color": "grey"
},
{
"from": "352",
"to": "140",
"arrows": "to",
"color": "blue"
}
]
}
}
}

View File

@@ -28,20 +28,20 @@ import path from 'node:path';
import {describe, expect, it} from 'vitest';
import * as cfg from '../lib/cfg/cfg.js';
import {generateStructure} from '../lib/cfg/cfg.js';
import {CompilerInfo} from '../types/compiler.interfaces.js';
import {makeFakeCompilerInfo, resolvePathFromTestRoot} from './utils.js';
async function DoCfgTest(cfgArg, filename, isLlvmIr = false) {
async function DoCfgTest(cfgArg, filename, isLlvmIr = false, compilerInfo?: CompilerInfo) {
const contents = JSON.parse(await fs.readFile(filename, 'utf8'));
const structure = cfg.generateStructure(
makeFakeCompilerInfo({
if (!compilerInfo) {
compilerInfo = makeFakeCompilerInfo({
compilerType: '',
version: cfgArg,
}),
contents.asm,
isLlvmIr,
);
});
}
const structure = generateStructure(compilerInfo, contents.asm, isLlvmIr);
expect(structure).toEqual(contents.cfg);
}
@@ -76,4 +76,19 @@ describe('Cfg test cases', () => {
});
}
});
describe('python', () => {
const pythonCompilerInfo = makeFakeCompilerInfo({
instructionSet: 'python',
group: 'python3',
version: 'Python 3.12.1',
compilerType: 'python',
});
for (const filename of files.filter(x => x.includes('python'))) {
it(filename, async () => {
await DoCfgTest('python', path.join(testcasespath, filename), false, pythonCompilerInfo);
});
}
});
});