mirror of
https://github.com/compiler-explorer/compiler-explorer.git
synced 2025-12-27 09:23:52 -05:00
Demangle optimization (#2390)
Uses a prefix tree to find replacements in each line, instead of looping over all replacements for all lines. Adds a large test (which was part of bug 1336). On my machine it used to take 45s to process that test, now it takes ~1.5s. Skip demangle replacement entirely if there's no demangling to be done. CC @apmorton Co-authored-by: partouf <partouf@gmail.com>
This commit is contained in:
@@ -27,12 +27,9 @@ import { logger } from '../logger';
|
||||
import { SymbolStore } from '../symbol-store';
|
||||
import * as utils from '../utils';
|
||||
|
||||
import { PrefixTree } from './prefix-tree';
|
||||
|
||||
export class BaseDemangler extends AsmRegex {
|
||||
/**
|
||||
*
|
||||
* @param {string} demanglerExe
|
||||
* @param {BaseCompiler} compiler
|
||||
*/
|
||||
constructor(demanglerExe, compiler) {
|
||||
super();
|
||||
|
||||
@@ -60,11 +57,13 @@ export class BaseDemangler extends AsmRegex {
|
||||
|
||||
// Iterates over the labels, demangle the label names and updates the start and
|
||||
// end position of the label.
|
||||
demangleLabels(labels, value, newValue) {
|
||||
demangleLabels(labels, tree) {
|
||||
if (!Array.isArray(labels) || labels.length === 0) return;
|
||||
|
||||
labels.forEach((label, index) => {
|
||||
if (label.name === value) {
|
||||
const value = label.name;
|
||||
const newValue = tree.findExact(value);
|
||||
if (newValue) {
|
||||
label.name = newValue;
|
||||
label.range.endCol = label.range.startCol + newValue.length;
|
||||
|
||||
@@ -88,34 +87,27 @@ export class BaseDemangler extends AsmRegex {
|
||||
}
|
||||
}
|
||||
|
||||
addMatchToOtherSymbols(matches) {
|
||||
if (!matches) return false;
|
||||
|
||||
const midx = matches.length - 1;
|
||||
this.othersymbols.add(matches[midx], matches[midx]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
collectLabels() {
|
||||
const symbolMatchers = [
|
||||
this.jumpDef,
|
||||
this.callPtrDef4, this.callPtrDef3, this.callPtrDef2, this.callPtrDef1,
|
||||
this.callDef,
|
||||
this.movUnderscoreDef, this.leaUnderscoreDef, this.quadUnderscoreDef,
|
||||
];
|
||||
for (let j = 0; j < this.result.asm.length; ++j) {
|
||||
const line = this.result.asm[j].text;
|
||||
|
||||
let matches = line.match(this.labelDef);
|
||||
if (matches) {
|
||||
const midx = matches.length - 1;
|
||||
this.symbolstore.add(matches[midx], matches[midx]);
|
||||
}
|
||||
const labelMatch = line.match(this.labelDef);
|
||||
if (labelMatch)
|
||||
this.symbolstore.add(labelMatch[labelMatch.length - 1]);
|
||||
|
||||
if (this.addMatchToOtherSymbols(line.match(this.jumpDef))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef4))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef3))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef2))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef1))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.callDef))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.movUnderscoreDef))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.leaUnderscoreDef))) continue;
|
||||
if (this.addMatchToOtherSymbols(line.match(this.quadUnderscoreDef))) continue;
|
||||
for (const reToMatch of symbolMatchers) {
|
||||
const matches = line.match(reToMatch);
|
||||
if (matches) {
|
||||
this.othersymbols.add(matches[matches.length - 1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.othersymbols.exclude(this.symbolstore);
|
||||
@@ -133,11 +125,6 @@ export class BaseDemangler extends AsmRegex {
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} symbol
|
||||
* @param {string} translation
|
||||
*/
|
||||
addTranslation(symbol, translation) {
|
||||
if (this.includeMetadata) {
|
||||
translation += this.getMetadata(symbol).map((meta) => ' [' + meta.description + ']').join();
|
||||
@@ -162,18 +149,16 @@ export class BaseDemangler extends AsmRegex {
|
||||
|
||||
const translations = [...this.symbolstore.listTranslations(), ...this.othersymbols.listTranslations()]
|
||||
.filter(elem => elem[0] !== elem[1]);
|
||||
if (translations.length > 0) {
|
||||
const tree = new PrefixTree(translations);
|
||||
|
||||
for (const asm of this.result.asm) {
|
||||
let line = asm.text;
|
||||
for (const [oldValue, newValue] of translations) {
|
||||
line = utils.replaceAll(line, oldValue, newValue);
|
||||
this.demangleLabels(asm.labels, oldValue, newValue);
|
||||
for (const asm of this.result.asm) {
|
||||
asm.text = tree.replaceAll(asm.text);
|
||||
this.demangleLabels(asm.labels, tree);
|
||||
}
|
||||
asm.text = line;
|
||||
|
||||
this.demangleLabelDefinitions(this.result.labelDefinitions, translations);
|
||||
}
|
||||
|
||||
this.demangleLabelDefinitions(this.result.labelDefinitions, translations);
|
||||
|
||||
return this.result;
|
||||
}
|
||||
|
||||
@@ -188,7 +173,7 @@ export class BaseDemangler extends AsmRegex {
|
||||
}
|
||||
|
||||
async process(result, execOptions) {
|
||||
let options = execOptions || {};
|
||||
const options = execOptions || {};
|
||||
this.result = result;
|
||||
|
||||
if (!this.symbolstore) {
|
||||
|
||||
107
lib/demangler/prefix-tree.js
Normal file
107
lib/demangler/prefix-tree.js
Normal file
@@ -0,0 +1,107 @@
|
||||
// Copyright (c) 2021, Compiler Explorer Authors
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// A prefix tree, really a trie, but I find the name annoyingly pompous, and
|
||||
// as it's pronounced the same way as "tree", super confusing.
|
||||
// Essentially we have a N-way tree, for N possible ASCII characters. Each
|
||||
// mapping is added to the tree, and the terminal nodes (that code for an actual
|
||||
// match) have an addition 'result' entry for their result.
|
||||
// * It's linear in the number of entries to build (though it's a super high
|
||||
// fan out tree, so RAM usage is pretty bad, and cache locality poor).
|
||||
// * It's linear in the length of a match to find the longest prefix, or a match.
|
||||
// It's the "find longest prefix" performance characteristic that we want for the
|
||||
// demangler.
|
||||
export class PrefixTree {
|
||||
constructor(mappings) {
|
||||
this.root = [];
|
||||
if (mappings) {
|
||||
for (const [from, to] of mappings)
|
||||
this.add(from, to);
|
||||
}
|
||||
}
|
||||
|
||||
add(from, to) {
|
||||
let node = this.root;
|
||||
for (let i = 0; i < from.length; ++i) {
|
||||
const character = from.charCodeAt(i);
|
||||
if (!node[character])
|
||||
node[character] = [];
|
||||
node = node[character];
|
||||
}
|
||||
node.result = to;
|
||||
}
|
||||
|
||||
// Finds the longest possible match by walking along the N-way tree until we
|
||||
// mismatch or reach the end of the input string. Along the way, we note the
|
||||
// most recent match (if any), which will be our return value.
|
||||
findLongestMatch(needle) {
|
||||
let node = this.root;
|
||||
let match = [null, null];
|
||||
for (let i = 0; i < needle.length; ++i) {
|
||||
const character = needle.charCodeAt(i);
|
||||
node = node[character];
|
||||
if (!node)
|
||||
break;
|
||||
if (node.result)
|
||||
match = [needle.substr(0, i + 1), node.result];
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
findExact(needle) {
|
||||
let node = this.root;
|
||||
for (let i = 0; i < needle.length; ++i) {
|
||||
const character = needle.charCodeAt(i);
|
||||
node = node[character];
|
||||
if (!node)
|
||||
break;
|
||||
}
|
||||
if (node && node['result'])
|
||||
return node['result'];
|
||||
return null;
|
||||
}
|
||||
|
||||
// Replace all matches (longest match first) in a line.
|
||||
replaceAll(line) {
|
||||
let result = '';
|
||||
let index = 0;
|
||||
// Loop over each possible replacement point in the line.
|
||||
// Use a binary search to find the replacements (allowing a prefix match). If we couldn't find a match, skip
|
||||
// on, else use the replacement, and skip by that amount.
|
||||
while (index < line.length) {
|
||||
const lineBit = line.substr(index);
|
||||
const [oldValue, newValue] = this.findLongestMatch(lineBit);
|
||||
if (oldValue) {
|
||||
// We found a replacement.
|
||||
result += newValue;
|
||||
index += oldValue.length;
|
||||
} else {
|
||||
// No match; output the unmatched character, and keep looking.
|
||||
result += line[index];
|
||||
index++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
20000
test/demangle-cases/bug-1336-first-20000-lines.asm
Normal file
20000
test/demangle-cases/bug-1336-first-20000-lines.asm
Normal file
File diff suppressed because it is too large
Load Diff
20000
test/demangle-cases/bug-1336-first-20000-lines.asm.demangle
Normal file
20000
test/demangle-cases/bug-1336-first-20000-lines.asm.demangle
Normal file
File diff suppressed because it is too large
Load Diff
6
test/demangle-cases/no-demangling.asm
Normal file
6
test/demangle-cases/no-demangling.asm
Normal file
@@ -0,0 +1,6 @@
|
||||
test:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
nop
|
||||
pop rbp
|
||||
ret
|
||||
6
test/demangle-cases/no-demangling.asm.demangle
Normal file
6
test/demangle-cases/no-demangling.asm.demangle
Normal file
@@ -0,0 +1,6 @@
|
||||
test:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
nop
|
||||
pop rbp
|
||||
ret
|
||||
26
test/demangle-cases/similar-symbols.asm
Normal file
26
test/demangle-cases/similar-symbols.asm
Normal file
@@ -0,0 +1,26 @@
|
||||
_Z2aai:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov DWORD PTR [rbp-4], edi
|
||||
nop
|
||||
pop rbp
|
||||
ret
|
||||
_Z2aaii:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov DWORD PTR [rbp-4], edi
|
||||
mov DWORD PTR [rbp-8], esi
|
||||
nop
|
||||
pop rbp
|
||||
ret
|
||||
main:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov esi, 1
|
||||
mov edi, 0
|
||||
call _Z2aaii
|
||||
mov edi, 1
|
||||
call _Z2aai
|
||||
mov eax, 0
|
||||
pop rbp
|
||||
ret
|
||||
26
test/demangle-cases/similar-symbols.asm.demangle
Normal file
26
test/demangle-cases/similar-symbols.asm.demangle
Normal file
@@ -0,0 +1,26 @@
|
||||
aa(int):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov DWORD PTR [rbp-4], edi
|
||||
nop
|
||||
pop rbp
|
||||
ret
|
||||
aa(int, int):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov DWORD PTR [rbp-4], edi
|
||||
mov DWORD PTR [rbp-8], esi
|
||||
nop
|
||||
pop rbp
|
||||
ret
|
||||
main:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov esi, 1
|
||||
mov edi, 0
|
||||
call aa(int, int)
|
||||
mov edi, 1
|
||||
call aa(int)
|
||||
mov eax, 0
|
||||
pop rbp
|
||||
ret
|
||||
@@ -23,11 +23,12 @@
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import { CppDemangler, Win32Demangler } from '../lib/demangler';
|
||||
import { PrefixTree } from '../lib/demangler/prefix-tree';
|
||||
import * as exec from '../lib/exec';
|
||||
import { SymbolStore } from '../lib/symbol-store';
|
||||
import * as utils from '../lib/utils';
|
||||
|
||||
import { fs, path, resolvePathFromTestRoot } from './utils';
|
||||
import { chai, fs, path, resolvePathFromTestRoot } from './utils';
|
||||
|
||||
const cppfiltpath = 'c++filt';
|
||||
|
||||
@@ -256,7 +257,7 @@ async function DoDemangleTest(filename) {
|
||||
|
||||
const demangler = new CppDemangler(cppfiltpath, new DummyCompiler());
|
||||
demangler.demanglerArguments = ['-n'];
|
||||
await demangler.process(resultIn).should.eventually.deep.equal(resultOut);
|
||||
return demangler.process(resultIn).should.eventually.deep.equal(resultOut);
|
||||
}
|
||||
|
||||
describe('File demangling', () => {
|
||||
@@ -282,3 +283,45 @@ describe('File demangling', () => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
describe('Demangler prefix tree', () => {
|
||||
const replacements = new PrefixTree();
|
||||
replacements.add('a', 'short_a');
|
||||
replacements.add('aa', 'long_a');
|
||||
replacements.add('aa_shouldnotmatch', 'ERROR');
|
||||
it('should replace a short match', () => {
|
||||
replacements.replaceAll('a').should.eq('short_a');
|
||||
});
|
||||
it('should replace using the longest match', () => {
|
||||
replacements.replaceAll('aa').should.eq('long_a');
|
||||
});
|
||||
it('should replace using both', () => {
|
||||
replacements.replaceAll('aaa').should.eq('long_ashort_a');
|
||||
});
|
||||
it('should replace using both', () => {
|
||||
replacements.replaceAll('a aa a aa').should.eq('short_a long_a short_a long_a');
|
||||
});
|
||||
it('should work with empty replacements', () => {
|
||||
new PrefixTree().replaceAll('Testing 123').should.eq('Testing 123');
|
||||
});
|
||||
it('should leave unmatching text alone', () => {
|
||||
replacements.replaceAll('Some text with none of the first letter of the ordered letter list')
|
||||
.should.eq('Some text with none of the first letter of the ordered letter list');
|
||||
});
|
||||
it('should handle a mixture', () => {
|
||||
replacements.replaceAll('Everyone loves an aardvark')
|
||||
.should.eq('Everyone loves short_an long_ardvshort_ark');
|
||||
});
|
||||
it('should find exact matches', () => {
|
||||
replacements.findExact('a').should.eq('short_a');
|
||||
replacements.findExact('aa').should.eq('long_a');
|
||||
replacements.findExact('aa_shouldnotmatch').should.eq('ERROR');
|
||||
});
|
||||
it('should find not find mismatches', () => {
|
||||
chai.expect(replacements.findExact('aaa')).to.be.null;
|
||||
chai.expect(replacements.findExact(' aa')).to.be.null;
|
||||
chai.expect(replacements.findExact(' a')).to.be.null;
|
||||
chai.expect(replacements.findExact('Oh noes')).to.be.null;
|
||||
chai.expect(replacements.findExact('')).to.be.null;
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user