From 122a0047e881ddd0aef650b25645e624eb0f6249 Mon Sep 17 00:00:00 2001 From: Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com> Date: Mon, 19 Jun 2023 22:13:15 -0400 Subject: [PATCH] Add instruction set info for generating arm control flow graphs (#5158) Fixes #4410 On top of #5156 --- lib/cfg/cfg-parsers/_all.ts | 1 + lib/cfg/cfg-parsers/llvm-ir.ts | 2 +- lib/cfg/cfg.ts | 4 +- lib/cfg/instruction-sets/_all.ts | 2 + lib/cfg/instruction-sets/arm.ts | 105 ++++++++++++++++++++++++++++ lib/cfg/instruction-sets/base.ts | 6 +- lib/cfg/instruction-sets/llvm-ir.ts | 7 +- lib/compilers/c3c.ts | 12 +++- lib/keyed-type.interfaces.ts | 2 +- lib/keyed-type.ts | 20 +++--- 10 files changed, 142 insertions(+), 19 deletions(-) create mode 100644 lib/cfg/instruction-sets/arm.ts diff --git a/lib/cfg/cfg-parsers/_all.ts b/lib/cfg/cfg-parsers/_all.ts index c390e2a1e..dc6bfda2f 100644 --- a/lib/cfg/cfg-parsers/_all.ts +++ b/lib/cfg/cfg-parsers/_all.ts @@ -22,6 +22,7 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. +export {BaseCFGParser} from './base.js'; export {ClangCFGParser} from './clang.js'; export {GccCFGParser} from './gcc.js'; export {LlvmIrCfgParser} from './llvm-ir.js'; diff --git a/lib/cfg/cfg-parsers/llvm-ir.ts b/lib/cfg/cfg-parsers/llvm-ir.ts index 05bacf14f..c32130e9b 100644 --- a/lib/cfg/cfg-parsers/llvm-ir.ts +++ b/lib/cfg/cfg-parsers/llvm-ir.ts @@ -38,7 +38,7 @@ export class LlvmIrCfgParser extends BaseCFGParser { labelReference: RegExp; static override get key() { - return 'llvmir'; + return 'llvm'; } constructor(instructionSetInfo: BaseInstructionSetInfo) { diff --git a/lib/cfg/cfg.ts b/lib/cfg/cfg.ts index d06b513c0..f1d33128e 100644 --- a/lib/cfg/cfg.ts +++ b/lib/cfg/cfg.ts @@ -51,8 +51,8 @@ export type CFG = { export function generateStructure(compilerInfo: CompilerInfo, asmArr: AssemblyLine[], isLlvmIr: boolean) { // figure out what we're working with - const isa = isLlvmIr ? 'llvmir' : compilerInfo.instructionSet; - const compilerGroup = isLlvmIr ? 'llvmir' : isLLVMBased(compilerInfo) ? 'clang' : compilerInfo.group; + const isa = isLlvmIr ? 'llvm' : compilerInfo.instructionSet; + const compilerGroup = isLlvmIr ? 'llvm' : isLLVMBased(compilerInfo) ? 'clang' : compilerInfo.group; const instructionSet = new (getInstructionSetByKey(isa ?? 'base'))(); const parser = new (getParserByKey(compilerGroup))(instructionSet); diff --git a/lib/cfg/instruction-sets/_all.ts b/lib/cfg/instruction-sets/_all.ts index 1e542d7c1..e3a439c4c 100644 --- a/lib/cfg/instruction-sets/_all.ts +++ b/lib/cfg/instruction-sets/_all.ts @@ -22,4 +22,6 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. +export {ArmInstructionSetInfo} from './arm.js'; +export {BaseInstructionSetInfo} from './base.js'; export {LlvmIrInstructionSetInfo} from './llvm-ir.js'; diff --git a/lib/cfg/instruction-sets/arm.ts b/lib/cfg/instruction-sets/arm.ts new file mode 100644 index 000000000..2c54835d1 --- /dev/null +++ b/lib/cfg/instruction-sets/arm.ts @@ -0,0 +1,105 @@ +// Copyright (c) 2023, Compiler Explorer Authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +import {InstructionSet} from '../../../types/instructionsets.js'; +import {BaseInstructionSetInfo, InstructionType} from './base.js'; + +export class ArmInstructionSetInfo extends BaseInstructionSetInfo { + static conditions = `(?:${[ + 'eq', + 'ne', + 'cs', + 'hs', + 'cc', + 'lo', + 'mi', + 'pl', + 'vs', + 'vc', + 'hi', + 'ls', + 'ge', + 'lt', + 'gt', + 'le', + 'al', + ].join('|')})`; + // handling: + // bcc label + // bxcc reg + // popcc {..., pc} + // popcc {..., tmp}; bxcc tmp + // mov pc, reg + // currently not handling: + // blcc label + // blxcc label + // blxcc reg + // movcc pc, reg + static conditionalJumps = new RegExp( + '\\b(?:' + + [ + `b${ArmInstructionSetInfo.conditions}(?:\\.w)?`, + `bx${ArmInstructionSetInfo.conditions}`, + `bxj${ArmInstructionSetInfo.conditions}`, + `cbz`, + `cbnz`, + ] + .map(re => `(?:${re})`) + .join('|') + + ')\\b', + ); + static unconditionalJumps = new RegExp( + '\\b(?:' + [`b(?:\\.w)?`, `bx`, `bxj`].map(re => `(?:${re})`).join('|') + ')\\b', + ); + static returnInstruction = new RegExp( + '(?:' + + [`bx`, `ret`].map(re => `(?:${re})`).join('|') + + ')\\b.+' + + `|pop\\s*\\{(?:r(?:\\d{2,}|[4-9]),\\s*)*pc\\}.+` + + `|mov\\s*pc\\s*,.+`, + ); + + static override get key(): InstructionSet[] { + return ['arm32', 'aarch64']; + } + + override isJmpInstruction(instruction: string) { + const opcode = instruction.trim().split(' ')[0].toLowerCase(); + return ( + !!opcode.match(ArmInstructionSetInfo.conditionalJumps) || + !!opcode.match(ArmInstructionSetInfo.unconditionalJumps) + ); + } + + override getInstructionType(instruction: string) { + const opcode = instruction.trim().split(' ')[0].toLowerCase(); + if (opcode.match(ArmInstructionSetInfo.unconditionalJumps)) return InstructionType.jmp; + else if (opcode.match(ArmInstructionSetInfo.conditionalJumps)) return InstructionType.conditionalJmpInst; + else if (instruction.trim().toLocaleLowerCase().match(ArmInstructionSetInfo.returnInstruction)) { + return InstructionType.retInst; + } else { + return InstructionType.notRetInst; + } + } +} diff --git a/lib/cfg/instruction-sets/base.ts b/lib/cfg/instruction-sets/base.ts index 57fe1dc31..203e93e3e 100644 --- a/lib/cfg/instruction-sets/base.ts +++ b/lib/cfg/instruction-sets/base.ts @@ -22,6 +22,8 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. +import {InstructionSet} from '../../../types/instructionsets.js'; + export enum InstructionType { jmp, conditionalJmpInst, @@ -30,12 +32,12 @@ export enum InstructionType { } export class BaseInstructionSetInfo { - static get key() { + static get key(): 'base' | InstructionSet | InstructionSet[] { return 'base'; } isJmpInstruction(x: string) { - return x.trim()[0] === 'j' || x.match(/\bb\.*(eq|ne|cs|hs|cc|lo|hi|ls|ge|lt|gt|le|rge|rlt)?\b/); + return x.trim()[0] === 'j' || !!x.match(/\bb\.*(eq|ne|cs|hs|cc|lo|hi|ls|ge|lt|gt|le|rge|rlt)?\b/); } getInstructionType(inst: string) { diff --git a/lib/cfg/instruction-sets/llvm-ir.ts b/lib/cfg/instruction-sets/llvm-ir.ts index 85c4c2ed8..3794d904f 100644 --- a/lib/cfg/instruction-sets/llvm-ir.ts +++ b/lib/cfg/instruction-sets/llvm-ir.ts @@ -22,14 +22,15 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. +import {InstructionSet} from '../../../types/instructionsets.js'; import {BaseInstructionSetInfo, InstructionType} from './base.js'; export class LlvmIrInstructionSetInfo extends BaseInstructionSetInfo { - static override get key() { - return 'llvmir'; + static override get key(): InstructionSet { + return 'llvm'; } - override isJmpInstruction(x: string): null { + override isJmpInstruction(x: string): boolean { throw Error('Not implemented'); } diff --git a/lib/compilers/c3c.ts b/lib/compilers/c3c.ts index fbe4176a2..a812583ae 100644 --- a/lib/compilers/c3c.ts +++ b/lib/compilers/c3c.ts @@ -15,8 +15,16 @@ export class C3Compiler extends BaseCompiler { } override optionsForFilter(filters: ParseFiltersAndOutputOptions, outputFilename: string) { - return ['compile-only', '-g', '-l', 'pthread', '--no-strip-unused', '--no-obj', '--no-emit-stdlib', - '--emit-asm']; + return [ + 'compile-only', + '-g', + '-l', + 'pthread', + '--no-strip-unused', + '--no-obj', + '--no-emit-stdlib', + '--emit-asm', + ]; } override getIrOutputFilename(inputFilename: string): string { diff --git a/lib/keyed-type.interfaces.ts b/lib/keyed-type.interfaces.ts index b2e131aaa..330423769 100644 --- a/lib/keyed-type.interfaces.ts +++ b/lib/keyed-type.interfaces.ts @@ -23,5 +23,5 @@ // POSSIBILITY OF SUCH DAMAGE. export interface Keyable { - get key(): string; + get key(): string | string[]; } diff --git a/lib/keyed-type.ts b/lib/keyed-type.ts index ea6526a48..000848e68 100644 --- a/lib/keyed-type.ts +++ b/lib/keyed-type.ts @@ -32,20 +32,24 @@ function makeKeyMap(typeName: string, objects: Record