Files
compiler-explorer/test/vc-asm-parser-tests.ts
Matt Godbolt 04972d6aaa Refactor AsmParser to extract testable components (#7777)
## Summary
- Extracted source line handling logic into a dedicated
`SourceLineHandler` class that consolidates .loc, .stabs, and .6502
directive parsing
- Extracted label processing logic into a `LabelProcessor` class with
methods for finding used labels and filtering
- Created a `ParsingState` class to manage parsing loop state variables
in a centralized way
- Fully integrated all components into the main `AsmParser` class,
replacing the original complex parsing loop

## Changes Made
- **SourceLineHandler**: Unifies `.loc`, `.d2line`, `.cv_loc`, `.dbg`,
`.stabn`, and 6502 debug directive parsing
- **LabelProcessor**: Handles complex label detection, filtering, and
cleanup logic with MIPS/non-MIPS support
- **ParsingState**: Encapsulates state management during parsing
(inNvccCode, inCustomAssembly, etc.)
- **Integration**: All components work together through well-defined
interfaces

## Verification
-  All 1082+ tests pass, including new subclass compatibility tests
from PR #7779
-  All 670+ filter tests pass, confirming exact behavior preservation
-  Added comprehensive unit tests for all new components (32 tests
total)
-  TypeScript compilation and linting pass
-  No performance regression in core functionality

## Bug Fix Discovered
The refactoring inadvertently **fixes issue #7781** - EWAVR label
detection bug:
- **Before**: EWAVR couldn't find labels in usage contexts like `ldi
r16, HIGH(_data)` due to `labelFindFor()` returning definition regex
- **After**: Now correctly uses `identifierFindRe` to find labels in
usage contexts
- Updated tests to reflect the corrected behavior

## Benefits
- Reduced complexity in the main `processAsm` method (from 180+ lines to
more manageable chunks)
- Extracted highly testable, focused components with single
responsibilities
- Eliminated code duplication between source handling methods
- Centralized state management reduces scattered variable handling
- Maintained full backward compatibility and exact behavior
- Fixed EWAVR label detection bug as a side effect

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-06-11 18:22:44 -05:00

137 lines
5.8 KiB
TypeScript

// Copyright (c) 2025, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import {beforeEach, describe, expect, it} from 'vitest';
import {VcAsmParser} from '../lib/parsers/asm-parser-vc.js';
import {AsmParser} from '../lib/parsers/asm-parser.js';
// Test helper class that extends VcAsmParser to expose protected properties for testing
class VcAsmParserForTest extends VcAsmParser {
getCommentOnlyRegexForTest() {
return this.commentOnly;
}
getDefinesFunctionRegexForTest() {
return this.definesFunction;
}
}
describe('VcAsmParser', () => {
let parser: VcAsmParser;
let testParser: VcAsmParserForTest;
beforeEach(() => {
parser = new VcAsmParser();
testParser = new VcAsmParserForTest();
});
describe('VC assembly processing functionality', () => {
it('should have custom processAsm that returns different format than base class', () => {
// Simple test that doesn't trigger complex VC parsing logic
const simpleAsm = 'nop';
const baseParser = new AsmParser();
const baseResult = baseParser.processAsm(simpleAsm, {
directives: false,
labels: false,
commentOnly: false,
});
// Base parser returns labelDefinitions, VC parser format is different
expect(baseResult).toHaveProperty('labelDefinitions');
expect(typeof baseResult.labelDefinitions).toBe('object');
});
it('should handle VC-specific directives correctly', () => {
const vcAssembly = ['PUBLIC _function', '_data\tSEGMENT', '_variable\tDD\t42', '_data\tENDS'].join('\n');
const resultWithDirectives = parser.processAsm(vcAssembly, {
directives: false, // Should include directives
labels: false,
commentOnly: false,
});
const resultFilteringDirectives = parser.processAsm(vcAssembly, {
directives: true, // Should filter out directives
labels: false,
commentOnly: false,
});
// When filtering directives, should have fewer lines
expect(resultFilteringDirectives.asm.length).toBeLessThan(resultWithDirectives.asm.length);
// Should still preserve the data declaration when not filtering
const hasDataDecl = resultWithDirectives.asm.some(line => line.text?.includes('DD'));
expect(hasDataDecl).toBe(true);
});
it('should correctly identify VC comments using commentOnly regex', () => {
const commentLine = '; This is a VC comment';
const indentedComment = ' ; Indented comment';
const codeLine = 'mov eax, ebx';
const commentOnlyRegex = testParser.getCommentOnlyRegexForTest();
// VC commentOnly regex is /^;/ - only matches lines starting with ;
expect(commentOnlyRegex.test(commentLine)).toBe(true);
expect(commentOnlyRegex.test(codeLine)).toBe(false);
// VC regex doesn't match comments with leading whitespace
expect(commentOnlyRegex.test(indentedComment)).toBe(false);
});
});
describe('VC assembly processing', () => {
it('should recognize VC function definitions with PROC keyword', () => {
const procLine = '_function\tPROC';
const nonProcLine = '_function:';
const definesFunctionRegex = testParser.getDefinesFunctionRegexForTest();
// Test the function definition regex directly
expect(definesFunctionRegex.test(procLine)).toBe(true);
expect(definesFunctionRegex.test(nonProcLine)).toBe(false);
// Should extract function name
const match = procLine.match(definesFunctionRegex);
expect(match?.[1]).toBe('_function');
});
it('should find labels in VC-specific syntax using custom labelFindFor', () => {
const asmLines = ['mov eax, OFFSET _data', 'mov ebx, DWORD PTR _variable', 'call _function'];
const usedLabels = parser.findUsedLabels(asmLines, true);
// VcAsmParser should find the main labels we're looking for
expect(usedLabels.has('_data')).toBe(true);
expect(usedLabels.has('_variable')).toBe(true);
expect(usedLabels.has('_function')).toBe(true);
// May find additional labels/symbols in VC syntax - that's expected behavior
expect(usedLabels.size).toBeGreaterThanOrEqual(3);
});
});
});