Add instruction set info for generating arm control flow graphs (#5158)

Fixes #4410

On top of #5156
This commit is contained in:
Jeremy Rifkin
2023-06-19 22:13:15 -04:00
committed by GitHub
parent 51d4073912
commit 122a0047e8
10 changed files with 142 additions and 19 deletions

View File

@@ -22,6 +22,7 @@
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
export {BaseCFGParser} from './base.js';
export {ClangCFGParser} from './clang.js';
export {GccCFGParser} from './gcc.js';
export {LlvmIrCfgParser} from './llvm-ir.js';

View File

@@ -38,7 +38,7 @@ export class LlvmIrCfgParser extends BaseCFGParser {
labelReference: RegExp;
static override get key() {
return 'llvmir';
return 'llvm';
}
constructor(instructionSetInfo: BaseInstructionSetInfo) {

View File

@@ -51,8 +51,8 @@ export type CFG = {
export function generateStructure(compilerInfo: CompilerInfo, asmArr: AssemblyLine[], isLlvmIr: boolean) {
// figure out what we're working with
const isa = isLlvmIr ? 'llvmir' : compilerInfo.instructionSet;
const compilerGroup = isLlvmIr ? 'llvmir' : isLLVMBased(compilerInfo) ? 'clang' : compilerInfo.group;
const isa = isLlvmIr ? 'llvm' : compilerInfo.instructionSet;
const compilerGroup = isLlvmIr ? 'llvm' : isLLVMBased(compilerInfo) ? 'clang' : compilerInfo.group;
const instructionSet = new (getInstructionSetByKey(isa ?? 'base'))();
const parser = new (getParserByKey(compilerGroup))(instructionSet);

View File

@@ -22,4 +22,6 @@
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
export {ArmInstructionSetInfo} from './arm.js';
export {BaseInstructionSetInfo} from './base.js';
export {LlvmIrInstructionSetInfo} from './llvm-ir.js';

View File

@@ -0,0 +1,105 @@
// Copyright (c) 2023, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import {InstructionSet} from '../../../types/instructionsets.js';
import {BaseInstructionSetInfo, InstructionType} from './base.js';
export class ArmInstructionSetInfo extends BaseInstructionSetInfo {
static conditions = `(?:${[
'eq',
'ne',
'cs',
'hs',
'cc',
'lo',
'mi',
'pl',
'vs',
'vc',
'hi',
'ls',
'ge',
'lt',
'gt',
'le',
'al',
].join('|')})`;
// handling:
// bcc label
// bxcc reg
// popcc {..., pc}
// popcc {..., tmp}; bxcc tmp
// mov pc, reg
// currently not handling:
// blcc label
// blxcc label
// blxcc reg
// movcc pc, reg
static conditionalJumps = new RegExp(
'\\b(?:' +
[
`b${ArmInstructionSetInfo.conditions}(?:\\.w)?`,
`bx${ArmInstructionSetInfo.conditions}`,
`bxj${ArmInstructionSetInfo.conditions}`,
`cbz`,
`cbnz`,
]
.map(re => `(?:${re})`)
.join('|') +
')\\b',
);
static unconditionalJumps = new RegExp(
'\\b(?:' + [`b(?:\\.w)?`, `bx`, `bxj`].map(re => `(?:${re})`).join('|') + ')\\b',
);
static returnInstruction = new RegExp(
'(?:' +
[`bx`, `ret`].map(re => `(?:${re})`).join('|') +
')\\b.+' +
`|pop\\s*\\{(?:r(?:\\d{2,}|[4-9]),\\s*)*pc\\}.+` +
`|mov\\s*pc\\s*,.+`,
);
static override get key(): InstructionSet[] {
return ['arm32', 'aarch64'];
}
override isJmpInstruction(instruction: string) {
const opcode = instruction.trim().split(' ')[0].toLowerCase();
return (
!!opcode.match(ArmInstructionSetInfo.conditionalJumps) ||
!!opcode.match(ArmInstructionSetInfo.unconditionalJumps)
);
}
override getInstructionType(instruction: string) {
const opcode = instruction.trim().split(' ')[0].toLowerCase();
if (opcode.match(ArmInstructionSetInfo.unconditionalJumps)) return InstructionType.jmp;
else if (opcode.match(ArmInstructionSetInfo.conditionalJumps)) return InstructionType.conditionalJmpInst;
else if (instruction.trim().toLocaleLowerCase().match(ArmInstructionSetInfo.returnInstruction)) {
return InstructionType.retInst;
} else {
return InstructionType.notRetInst;
}
}
}

View File

@@ -22,6 +22,8 @@
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import {InstructionSet} from '../../../types/instructionsets.js';
export enum InstructionType {
jmp,
conditionalJmpInst,
@@ -30,12 +32,12 @@ export enum InstructionType {
}
export class BaseInstructionSetInfo {
static get key() {
static get key(): 'base' | InstructionSet | InstructionSet[] {
return 'base';
}
isJmpInstruction(x: string) {
return x.trim()[0] === 'j' || x.match(/\bb\.*(eq|ne|cs|hs|cc|lo|hi|ls|ge|lt|gt|le|rge|rlt)?\b/);
return x.trim()[0] === 'j' || !!x.match(/\bb\.*(eq|ne|cs|hs|cc|lo|hi|ls|ge|lt|gt|le|rge|rlt)?\b/);
}
getInstructionType(inst: string) {

View File

@@ -22,14 +22,15 @@
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import {InstructionSet} from '../../../types/instructionsets.js';
import {BaseInstructionSetInfo, InstructionType} from './base.js';
export class LlvmIrInstructionSetInfo extends BaseInstructionSetInfo {
static override get key() {
return 'llvmir';
static override get key(): InstructionSet {
return 'llvm';
}
override isJmpInstruction(x: string): null {
override isJmpInstruction(x: string): boolean {
throw Error('Not implemented');
}

View File

@@ -15,8 +15,16 @@ export class C3Compiler extends BaseCompiler {
}
override optionsForFilter(filters: ParseFiltersAndOutputOptions, outputFilename: string) {
return ['compile-only', '-g', '-l', 'pthread', '--no-strip-unused', '--no-obj', '--no-emit-stdlib',
'--emit-asm'];
return [
'compile-only',
'-g',
'-l',
'pthread',
'--no-strip-unused',
'--no-obj',
'--no-emit-stdlib',
'--emit-asm',
];
}
override getIrOutputFilename(inputFilename: string): string {

View File

@@ -23,5 +23,5 @@
// POSSIBILITY OF SUCH DAMAGE.
export interface Keyable {
get key(): string;
get key(): string | string[];
}

View File

@@ -32,20 +32,24 @@ function makeKeyMap<T extends Keyable>(typeName: string, objects: Record<string,
for (const name in objects) {
const type = objects[name];
const key = type.key;
const keys = type.key;
if (key === undefined) {
if (keys === undefined) {
logger.error(`${typeName} ${name} does not provide a key value`);
haveErrors = true;
} else if (!key) {
} else if (!keys) {
logger.error(`${typeName} ${name} provides empty key value`);
haveErrors = true;
} else if (keyToTypeMap[key] === undefined) {
keyToTypeMap[key] = type;
keyToNameMap[key] = name;
} else {
logger.error(`${typeName} ${name} key conflicts with ${keyToNameMap[key]}`);
haveErrors = true;
for (const key of keys instanceof Array ? keys : [keys]) {
if (keyToTypeMap[key] === undefined) {
keyToTypeMap[key] = type;
keyToNameMap[key] = name;
} else {
logger.error(`${typeName} ${name} key conflicts with ${keyToNameMap[key]}`);
haveErrors = true;
}
}
}
}