Demangle optimization (#2390)

Uses a prefix tree to find replacements in each
line, instead of looping over all replacements for all lines.
Adds a large test (which was part of bug 1336). On my machine it
used to take 45s to process that test, now it takes ~1.5s.

Skip demangle replacement entirely if there's no demangling to be done. CC @apmorton

Co-authored-by: partouf <partouf@gmail.com>
This commit is contained in:
Matt Godbolt
2021-01-18 11:33:19 -06:00
committed by GitHub
parent c42ffe540b
commit a9d76dd0df
9 changed files with 40246 additions and 47 deletions

View File

@@ -27,12 +27,9 @@ import { logger } from '../logger';
import { SymbolStore } from '../symbol-store';
import * as utils from '../utils';
import { PrefixTree } from './prefix-tree';
export class BaseDemangler extends AsmRegex {
/**
*
* @param {string} demanglerExe
* @param {BaseCompiler} compiler
*/
constructor(demanglerExe, compiler) {
super();
@@ -60,11 +57,13 @@ export class BaseDemangler extends AsmRegex {
// Iterates over the labels, demangle the label names and updates the start and
// end position of the label.
demangleLabels(labels, value, newValue) {
demangleLabels(labels, tree) {
if (!Array.isArray(labels) || labels.length === 0) return;
labels.forEach((label, index) => {
if (label.name === value) {
const value = label.name;
const newValue = tree.findExact(value);
if (newValue) {
label.name = newValue;
label.range.endCol = label.range.startCol + newValue.length;
@@ -88,34 +87,27 @@ export class BaseDemangler extends AsmRegex {
}
}
addMatchToOtherSymbols(matches) {
if (!matches) return false;
const midx = matches.length - 1;
this.othersymbols.add(matches[midx], matches[midx]);
return true;
}
collectLabels() {
const symbolMatchers = [
this.jumpDef,
this.callPtrDef4, this.callPtrDef3, this.callPtrDef2, this.callPtrDef1,
this.callDef,
this.movUnderscoreDef, this.leaUnderscoreDef, this.quadUnderscoreDef,
];
for (let j = 0; j < this.result.asm.length; ++j) {
const line = this.result.asm[j].text;
let matches = line.match(this.labelDef);
if (matches) {
const midx = matches.length - 1;
this.symbolstore.add(matches[midx], matches[midx]);
}
const labelMatch = line.match(this.labelDef);
if (labelMatch)
this.symbolstore.add(labelMatch[labelMatch.length - 1]);
if (this.addMatchToOtherSymbols(line.match(this.jumpDef))) continue;
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef4))) continue;
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef3))) continue;
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef2))) continue;
if (this.addMatchToOtherSymbols(line.match(this.callPtrDef1))) continue;
if (this.addMatchToOtherSymbols(line.match(this.callDef))) continue;
if (this.addMatchToOtherSymbols(line.match(this.movUnderscoreDef))) continue;
if (this.addMatchToOtherSymbols(line.match(this.leaUnderscoreDef))) continue;
if (this.addMatchToOtherSymbols(line.match(this.quadUnderscoreDef))) continue;
for (const reToMatch of symbolMatchers) {
const matches = line.match(reToMatch);
if (matches) {
this.othersymbols.add(matches[matches.length - 1]);
break;
}
}
}
this.othersymbols.exclude(this.symbolstore);
@@ -133,11 +125,6 @@ export class BaseDemangler extends AsmRegex {
return [];
}
/**
*
* @param {string} symbol
* @param {string} translation
*/
addTranslation(symbol, translation) {
if (this.includeMetadata) {
translation += this.getMetadata(symbol).map((meta) => ' [' + meta.description + ']').join();
@@ -162,18 +149,16 @@ export class BaseDemangler extends AsmRegex {
const translations = [...this.symbolstore.listTranslations(), ...this.othersymbols.listTranslations()]
.filter(elem => elem[0] !== elem[1]);
if (translations.length > 0) {
const tree = new PrefixTree(translations);
for (const asm of this.result.asm) {
let line = asm.text;
for (const [oldValue, newValue] of translations) {
line = utils.replaceAll(line, oldValue, newValue);
this.demangleLabels(asm.labels, oldValue, newValue);
for (const asm of this.result.asm) {
asm.text = tree.replaceAll(asm.text);
this.demangleLabels(asm.labels, tree);
}
asm.text = line;
this.demangleLabelDefinitions(this.result.labelDefinitions, translations);
}
this.demangleLabelDefinitions(this.result.labelDefinitions, translations);
return this.result;
}
@@ -188,7 +173,7 @@ export class BaseDemangler extends AsmRegex {
}
async process(result, execOptions) {
let options = execOptions || {};
const options = execOptions || {};
this.result = result;
if (!this.symbolstore) {

View File

@@ -0,0 +1,107 @@
// Copyright (c) 2021, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// A prefix tree, really a trie, but I find the name annoyingly pompous, and
// as it's pronounced the same way as "tree", super confusing.
// Essentially we have a N-way tree, for N possible ASCII characters. Each
// mapping is added to the tree, and the terminal nodes (that code for an actual
// match) have an addition 'result' entry for their result.
// * It's linear in the number of entries to build (though it's a super high
// fan out tree, so RAM usage is pretty bad, and cache locality poor).
// * It's linear in the length of a match to find the longest prefix, or a match.
// It's the "find longest prefix" performance characteristic that we want for the
// demangler.
export class PrefixTree {
constructor(mappings) {
this.root = [];
if (mappings) {
for (const [from, to] of mappings)
this.add(from, to);
}
}
add(from, to) {
let node = this.root;
for (let i = 0; i < from.length; ++i) {
const character = from.charCodeAt(i);
if (!node[character])
node[character] = [];
node = node[character];
}
node.result = to;
}
// Finds the longest possible match by walking along the N-way tree until we
// mismatch or reach the end of the input string. Along the way, we note the
// most recent match (if any), which will be our return value.
findLongestMatch(needle) {
let node = this.root;
let match = [null, null];
for (let i = 0; i < needle.length; ++i) {
const character = needle.charCodeAt(i);
node = node[character];
if (!node)
break;
if (node.result)
match = [needle.substr(0, i + 1), node.result];
}
return match;
}
findExact(needle) {
let node = this.root;
for (let i = 0; i < needle.length; ++i) {
const character = needle.charCodeAt(i);
node = node[character];
if (!node)
break;
}
if (node && node['result'])
return node['result'];
return null;
}
// Replace all matches (longest match first) in a line.
replaceAll(line) {
let result = '';
let index = 0;
// Loop over each possible replacement point in the line.
// Use a binary search to find the replacements (allowing a prefix match). If we couldn't find a match, skip
// on, else use the replacement, and skip by that amount.
while (index < line.length) {
const lineBit = line.substr(index);
const [oldValue, newValue] = this.findLongestMatch(lineBit);
if (oldValue) {
// We found a replacement.
result += newValue;
index += oldValue.length;
} else {
// No match; output the unmatched character, and keep looking.
result += line[index];
index++;
}
}
return result;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,6 @@
test:
push rbp
mov rbp, rsp
nop
pop rbp
ret

View File

@@ -0,0 +1,6 @@
test:
push rbp
mov rbp, rsp
nop
pop rbp
ret

View File

@@ -0,0 +1,26 @@
_Z2aai:
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
nop
pop rbp
ret
_Z2aaii:
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
mov DWORD PTR [rbp-8], esi
nop
pop rbp
ret
main:
push rbp
mov rbp, rsp
mov esi, 1
mov edi, 0
call _Z2aaii
mov edi, 1
call _Z2aai
mov eax, 0
pop rbp
ret

View File

@@ -0,0 +1,26 @@
aa(int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
nop
pop rbp
ret
aa(int, int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
mov DWORD PTR [rbp-8], esi
nop
pop rbp
ret
main:
push rbp
mov rbp, rsp
mov esi, 1
mov edi, 0
call aa(int, int)
mov edi, 1
call aa(int)
mov eax, 0
pop rbp
ret

View File

@@ -23,11 +23,12 @@
// POSSIBILITY OF SUCH DAMAGE.
import { CppDemangler, Win32Demangler } from '../lib/demangler';
import { PrefixTree } from '../lib/demangler/prefix-tree';
import * as exec from '../lib/exec';
import { SymbolStore } from '../lib/symbol-store';
import * as utils from '../lib/utils';
import { fs, path, resolvePathFromTestRoot } from './utils';
import { chai, fs, path, resolvePathFromTestRoot } from './utils';
const cppfiltpath = 'c++filt';
@@ -256,7 +257,7 @@ async function DoDemangleTest(filename) {
const demangler = new CppDemangler(cppfiltpath, new DummyCompiler());
demangler.demanglerArguments = ['-n'];
await demangler.process(resultIn).should.eventually.deep.equal(resultOut);
return demangler.process(resultIn).should.eventually.deep.equal(resultOut);
}
describe('File demangling', () => {
@@ -282,3 +283,45 @@ describe('File demangling', () => {
}
}
});
describe('Demangler prefix tree', () => {
const replacements = new PrefixTree();
replacements.add('a', 'short_a');
replacements.add('aa', 'long_a');
replacements.add('aa_shouldnotmatch', 'ERROR');
it('should replace a short match', () => {
replacements.replaceAll('a').should.eq('short_a');
});
it('should replace using the longest match', () => {
replacements.replaceAll('aa').should.eq('long_a');
});
it('should replace using both', () => {
replacements.replaceAll('aaa').should.eq('long_ashort_a');
});
it('should replace using both', () => {
replacements.replaceAll('a aa a aa').should.eq('short_a long_a short_a long_a');
});
it('should work with empty replacements', () => {
new PrefixTree().replaceAll('Testing 123').should.eq('Testing 123');
});
it('should leave unmatching text alone', () => {
replacements.replaceAll('Some text with none of the first letter of the ordered letter list')
.should.eq('Some text with none of the first letter of the ordered letter list');
});
it('should handle a mixture', () => {
replacements.replaceAll('Everyone loves an aardvark')
.should.eq('Everyone loves short_an long_ardvshort_ark');
});
it('should find exact matches', () => {
replacements.findExact('a').should.eq('short_a');
replacements.findExact('aa').should.eq('long_a');
replacements.findExact('aa_shouldnotmatch').should.eq('ERROR');
});
it('should find not find mismatches', () => {
chai.expect(replacements.findExact('aaa')).to.be.null;
chai.expect(replacements.findExact(' aa')).to.be.null;
chai.expect(replacements.findExact(' a')).to.be.null;
chai.expect(replacements.findExact('Oh noes')).to.be.null;
chai.expect(replacements.findExact('')).to.be.null;
});
});