mirror of
https://github.com/compiler-explorer/compiler-explorer.git
synced 2025-12-27 10:33:59 -05:00
Fixes test failures in assembly documentation generation. ## Import fixes All docenizers: Change imports from `../base.js` to `../../../types/assembly-docs.interfaces.js` with `import type`. ## AMD64: Fix missing SHR instruction The docenizer only read the first `<table>` in HTML files. Files like `SAL:SAR:SHL:SHR.html` have multiple tables - SHR was in table 2. **Fix**: Read all tables and combine results. ## PTX: Fix broken documentation extraction The PTX website structure changed. The old code looked for navigation links like "Instructions: add, sub, mul" which no longer exist. **Fix**: - Find instructions by scanning `<code>` blocks - Map each to its documentation section - Extract text from Description paragraphs - Ensure common instructions (add, sub) map to their definition sections, not changelogs All asm-docs tests now pass. --------- Co-authored-by: Claude <noreply@anthropic.com>
150 lines
5.2 KiB
Python
Executable File
150 lines
5.2 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import urllib
|
|
import re
|
|
from urllib import request
|
|
from urllib import parse
|
|
|
|
parser = argparse.ArgumentParser(description='Docenizes the EVM documentation')
|
|
parser.add_argument('-i', '--inputfolder', type=str,
|
|
help='Folder where the input files reside as .html. Default is ./evm-inst-docs/',
|
|
default='evm-inst-docs')
|
|
parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .ts file. Default is ./asm-docs-evm.ts',
|
|
default='./asm-docs-evm.ts')
|
|
parser.add_argument('-d', '--downloadfolder', type=str,
|
|
help='Folder where the archive will be downloaded and extracted', default='evm-inst-docs')
|
|
|
|
# | `0x00` | STOP | Halts execution | - | 0 |
|
|
MNEMONIC_RE = re.compile('^\| `0x([A-Za-z0-9]+)` \| (.*) \| .* \| .* \| .* \|$')
|
|
|
|
# Where to extract the asmdoc archive.
|
|
ARCHIVE_DESC_URL = "https://raw.githubusercontent.com/comitylabs/evm.codes/main/opcodes.json"
|
|
ARCHIVE_DESC_NAME = "opcodes.json"
|
|
ARCHIVE_MNEM_URL = "https://raw.githubusercontent.com/crytic/evm-opcodes/master/README.md"
|
|
ARCHIVE_MNEM_NAME = "README.md"
|
|
|
|
|
|
class Instruction(object):
|
|
def __init__(self, opcode, mnemonic, tooltip, body):
|
|
self.opcode = opcode
|
|
self.mnemonic = mnemonic
|
|
self.tooltip = tooltip.rstrip(': ,')
|
|
self.body = body
|
|
|
|
def __str__(self):
|
|
return f"{self.opcode} = {self.tooltip}\n{self.body}"
|
|
|
|
|
|
def get_url_for_instruction(instr):
|
|
return f"https://www.evm.codes/#{urllib.parse.quote(instr.opcode)}"
|
|
|
|
|
|
def download_asm_doc_archive(downloadfolder):
|
|
if not os.path.exists(downloadfolder):
|
|
print(f"Creating {downloadfolder} as download folder")
|
|
os.makedirs(downloadfolder)
|
|
elif not os.path.isdir(downloadfolder):
|
|
print(f"Error: download folder {downloadfolder} is not a directory")
|
|
sys.exit(1)
|
|
archive_desc_name = os.path.join(downloadfolder, ARCHIVE_DESC_NAME)
|
|
print("Downloading archive...")
|
|
urllib.request.urlretrieve(ARCHIVE_DESC_URL, archive_desc_name)
|
|
|
|
archive_mnem_name = os.path.join(downloadfolder, ARCHIVE_MNEM_NAME)
|
|
print("Downloading archive...")
|
|
urllib.request.urlretrieve(ARCHIVE_MNEM_URL, archive_mnem_name)
|
|
|
|
|
|
def get_description_paragraphs(opcode):
|
|
stack_input = 'Input: ' + (f'<code>{opcode["input"]}</code>' if opcode["input"] != "" else '-')
|
|
stack_output = 'Output: ' + (f'<code>{opcode["output"]}</code>' if opcode["output"] != "" else '-')
|
|
return [opcode["description"], stack_input, stack_output]
|
|
|
|
|
|
def generate_opcode_mnemonic_map(mnemonic_file):
|
|
mnemonic_map = {}
|
|
for line in mnemonic_file:
|
|
match = MNEMONIC_RE.match(line)
|
|
if match:
|
|
mnemonic_map[match.group(1)] = match.group(2)
|
|
return mnemonic_map
|
|
|
|
|
|
def is_valid_opcode(opcode, mnemonic_map):
|
|
return opcode in mnemonic_map
|
|
|
|
def parse(descriptions_file, mnemonic_file):
|
|
descriptions = json.load(descriptions_file)
|
|
mnemonic_map = generate_opcode_mnemonic_map(mnemonic_file)
|
|
print(mnemonic_map)
|
|
opcodes = descriptions.items()
|
|
instructions = []
|
|
for opcode, body in opcodes:
|
|
if is_valid_opcode(opcode, mnemonic_map):
|
|
mnemonic = mnemonic_map[opcode]
|
|
opcode_desc = get_description_paragraphs(body)
|
|
instructions.append(Instruction(
|
|
opcode,
|
|
mnemonic,
|
|
opcode_desc[0],
|
|
'\n'.join(opcode_desc))
|
|
)
|
|
return instructions
|
|
|
|
|
|
def parse_html(directory):
|
|
print("Parsing instructions...")
|
|
instructions = []
|
|
try:
|
|
with open(os.path.join(directory, ARCHIVE_DESC_NAME), encoding='utf-8') as description_file:
|
|
with open(os.path.join(directory, ARCHIVE_MNEM_NAME), encoding='utf-8') as mnemonic_file:
|
|
instructions = parse(description_file, mnemonic_file)
|
|
except Exception as e:
|
|
print(f"Error parsing files:\n{e}")
|
|
|
|
return instructions
|
|
|
|
|
|
def main():
|
|
args = parser.parse_args()
|
|
print(f"Called with: {args}")
|
|
# If we don't have the html folder already...
|
|
if not os.path.isdir(os.path.join(args.inputfolder, 'html')):
|
|
try:
|
|
download_asm_doc_archive(args.downloadfolder)
|
|
except IOError as e:
|
|
print("Error when downloading archive:")
|
|
print(e)
|
|
sys.exit(1)
|
|
instructions = parse_html(args.inputfolder)
|
|
instructions.sort(key=lambda b: b.opcode)
|
|
all_inst = set()
|
|
print(f"Writing {len(instructions)} instructions")
|
|
with open(args.outputpath, 'w') as f:
|
|
f.write("""
|
|
import type {AssemblyInstructionInfo} from '../../../types/assembly-docs.interfaces.js';
|
|
|
|
export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {
|
|
if (!opcode) return;
|
|
switch (opcode.toUpperCase()) {
|
|
""".lstrip())
|
|
for inst in instructions:
|
|
f.write(f' case "{inst.mnemonic}":\n')
|
|
f.write(' return {}'.format(json.dumps({
|
|
"tooltip": inst.tooltip,
|
|
"html": inst.body,
|
|
"url": get_url_for_instruction(inst)
|
|
}, indent=16, separators=(',', ': '), sort_keys=True))[:-1] + ' };\n\n')
|
|
f.write("""
|
|
}
|
|
}
|
|
""")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|