mirror of
https://github.com/compiler-explorer/compiler-explorer.git
synced 2025-12-27 09:23:52 -05:00
87 lines
3.4 KiB
TypeScript
87 lines
3.4 KiB
TypeScript
import {Document} from 'domhandler';
|
|
import {load, Cheerio, CheerioAPI} from 'cheerio';
|
|
import {readFile} from 'fs/promises';
|
|
|
|
const LANGREF_PATH = './vendor/LangRef.html';
|
|
|
|
type InstructionInfo = {
|
|
name: string;
|
|
url: string;
|
|
html: string;
|
|
tooltip: string;
|
|
}
|
|
|
|
/** Retrieves the list of all LLVM instruction names */
|
|
const getInstructionList = (root: Cheerio<Document>, $: CheerioAPI) => {
|
|
const anchor$ = root.find('[href="\\#instruction-reference"]').first();
|
|
const instructions$ = anchor$.parent().find('+ ul > li > ul > li > p > a > code');
|
|
const instructions = instructions$.map((_, el) => {
|
|
const span$ = $(el).find('span');
|
|
return span$.map((_, el) => $(el).text())
|
|
.get()
|
|
.filter((s) => s !== '..')
|
|
.map((x) => {
|
|
// Do any name-to-anchor rewrites here
|
|
if (x == "va_arg") return "va-arg";
|
|
return x;
|
|
})
|
|
.join('-');
|
|
}).get();
|
|
return [...new Set(instructions)].filter(x => x !== '..' && x !== 'to');
|
|
}
|
|
|
|
/** Crawls information about one LLVM instruction */
|
|
const getInstructionInfo = (instruction: string, root: Cheerio<Document>, $: CheerioAPI): InstructionInfo => {
|
|
const anchor$ = root.find(`#${instruction.replace(' ', '-')}-instruction`).first();
|
|
const url = `https://llvm.org/docs/LangRef.html#${instruction}-instruction`;
|
|
|
|
const overviewHeaders$ = anchor$.find('h5').filter((_, el) => $(el).text().startsWith('Overview'));
|
|
const overviewPars$ = overviewHeaders$.parent().find('> p');
|
|
|
|
// Load the HTML content of the anchor element into a new Cheerio instance
|
|
const myhtml$ = load(anchor$.html()!);
|
|
// <a class="headerlink" href="#id210" title="Permalink to this heading">¶</a>
|
|
|
|
// Find and remove all <a> elements with text equal to '¶'
|
|
myhtml$('a').filter((_, el) => myhtml$(el).text().trim() === '¶').remove();
|
|
|
|
// Extract the modified HTML content
|
|
const modifiedHtml = myhtml$.html();
|
|
|
|
// Instructions that have '-to' in their anchors are likely conversion
|
|
// instructions. The trailing '-to' has to be trimmed, as otherwise
|
|
// operations like 'fptrunc' won't find a match in the documentation.
|
|
const name = instruction.replace(new RegExp("-to$"),'')
|
|
|
|
return {
|
|
url,
|
|
name,
|
|
html: modifiedHtml,
|
|
tooltip: $(overviewPars$).text()
|
|
};
|
|
}
|
|
|
|
const contents = await readFile(LANGREF_PATH, 'utf8');
|
|
const $ = load(contents);
|
|
|
|
const names = getInstructionList($.root(), $);
|
|
const info = names.map((x) => getInstructionInfo(x, $.root(), $));
|
|
|
|
console.log('import type {AssemblyInstructionInfo} from \'../../../types/assembly-docs.interfaces.js\';');
|
|
console.log('');
|
|
console.log('export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {');
|
|
console.log(' if (!opcode) return;');
|
|
console.log(' switch (opcode.toUpperCase()) {');
|
|
|
|
for (const instruction of info) {
|
|
console.log(` case '${instruction.name.toUpperCase()}':`);
|
|
console.log(' return {');
|
|
console.log(` url: \`${instruction.url}\`,`);
|
|
console.log(` html: \`${instruction.html.replaceAll('`', '\\`')}\`,`);
|
|
console.log(` tooltip: \`${instruction.tooltip.replaceAll('\n', '').replaceAll('`', '\\`')}\`,`);
|
|
console.log(' };');
|
|
}
|
|
|
|
console.log(' }');
|
|
console.log('}');
|