Files
compiler-explorer/lib/compilers/nvcc.ts
Matt Godbolt (bot acct) 20505fba5a Filter NVCC fatbinData label from assembly output (#8506)
*(I'm Molty, an AI assistant acting on behalf of @mattgodbolt)*

Closes #5178

## Problem

NVCC embeds the CUDA fat binary blob in the host-side x86 assembly
inside a `#APP`/`#NO_APP` inline-assembly block, under a label called
`fatbinData`. In a realistic kernel this can be 100+ lines of `.quad`
hex values — a wall of noise before any user-readable code.

Example: https://godbolt.org/z/W3YMcq8oY

## Fix

Per-compiler pre-processing step in `NvccCompiler.processAsm()`: before
the host assembly reaches the ASM parser, any `#APP`/`#NO_APP` block
containing a `.nv_fatbin` section is stripped out entirely.

- Only `.nv_fatbin` blocks are removed; genuine user inline-assembly
blocks (which also use `#APP`/`#NO_APP` but without `.nv_fatbin`) are
left intact.
- Intentionally NVCC-specific — no changes to the base `AsmParser`, no
false-positive risk for other compilers.
- Stripping happens before `findUsedLabels` runs, so `fatbinData`
naturally disappears as unreferenced without any special-casing in the
parser's label-filtering logic.
- Gated on the existing Labels filter: with no filters active everything
remains visible; with Labels on the blob disappears.

## Testing

**New compiler unit tests** (`test/compilers/nvcc-tests.ts`):
- Strips `#APP`/`#NO_APP` blocks containing `.nv_fatbin`
- Preserves `#APP`/`#NO_APP` blocks without `.nv_fatbin` (user inline
asm)
- Handles multiple mixed blocks correctly
- No-op when no `#APP` blocks present
- Gracefully handles malformed unclosed blocks

**New parser filter-case**
(`test/filters-cases/nvcc-x86-host-example.asm`): representative NVCC
12.0 host assembly (real 15-line fat binary, boilerplate functions,
`.nvFatBinSegment` section) with nine filter-combination snapshots
documenting parser behaviour in isolation. These correctly show that the
**base parser itself does not filter `fatbinData`** — that's the
compiler pre-processor's job.

All 767 tests pass.

Co-authored-by: mattgodbolt-molty <mattgodbolt-molty@users.noreply.github.com>
2026-02-22 19:46:36 -06:00

238 lines
10 KiB
TypeScript

// Copyright (c) 2018, Compiler Explorer Authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
import * as fs from 'node:fs/promises';
import Path from 'node:path';
import Semver from 'semver';
import _ from 'underscore';
import type {CompilationInfo, CompilationResult} from '../../types/compilation/compilation.interfaces.js';
import type {PreliminaryCompilerInfo} from '../../types/compiler.interfaces.js';
import type {ParseFiltersAndOutputOptions} from '../../types/features/filters.interfaces.js';
import {unwrap} from '../assert.js';
import {BaseCompiler} from '../base-compiler.js';
import {CompilationEnvironment} from '../compilation-env.js';
import {PTXAsmParser} from '../parsers/asm-parser-ptx.js';
import {SassAsmParser} from '../parsers/asm-parser-sass.js';
import {asSafeVer} from '../utils.js';
import {ClangParser} from './argument-parsers.js';
export class NvccCompiler extends BaseCompiler {
static get key() {
return 'nvcc';
}
deviceAsmParser: SassAsmParser;
ptxParser: PTXAsmParser;
constructor(info: PreliminaryCompilerInfo, env: CompilationEnvironment) {
super(info, env);
this.compiler.supportsOptOutput = true;
this.compiler.supportsDeviceAsmView = true;
this.deviceAsmParser = new SassAsmParser(this.compilerProps);
this.ptxParser = new PTXAsmParser(this.compilerProps);
}
// TODO: (for all of CUDA)
// * lots of whitespace from nvcc
// * would be nice to try and filter unused `.func`s from e.g. clang output
override optionsForFilter(filters: ParseFiltersAndOutputOptions, outputFilename: string, userOptions?: string[]) {
const opts = ['-o', this.filename(outputFilename), '-g', '-lineinfo', '--keep-device-functions'];
if (!filters.execute) {
opts.push('-c', '-keep', '-keep-dir', Path.dirname(outputFilename));
if (!filters.binary) {
opts.push('-Xcompiler=-S');
}
}
return opts;
}
override getArgumentParserClass() {
return ClangParser;
}
override optOutputRequested(options: string[]) {
return (
super.optOutputRequested(options) ||
options.includes('--optimization-info') ||
options.includes('-opt-info')
);
}
async nvdisasm(outputFilename: string, result: any, maxOutput: number) {
const {nvdisasm, semver} = this.compiler;
const args = Semver.lt(asSafeVer(semver), '11.0.0', true)
? [outputFilename, '-c', '-g']
: [outputFilename, '-c', '-g', '-hex'];
const {code, execTime, stdout} = await this.exec(unwrap(nvdisasm), args, {
maxOutput,
customCwd: result.dirPath,
});
if (code === 0) {
result.objdumpTime = execTime;
result.asm = this.postProcessObjdumpOutput(stdout);
} else {
result.asm = `<No output: ${Path.basename(unwrap(nvdisasm))} returned ${code}>`;
}
return result;
}
override async postProcess(result, outputFilename: string, filters: ParseFiltersAndOutputOptions) {
const maxSize = this.env.ceProps('max-asm-size', 64 * 1024 * 1024);
const optPromise = result.optPath ? this.processOptOutput(result.optPath) : Promise.resolve([]);
const postProcess = _.compact(this.compiler.postProcess);
const asmPromise = (
filters.binary
? this.objdump(outputFilename, {}, maxSize, !!filters.intel, !!filters.demangle, false, false, filters)
: (async () => {
if (result.asmSize === undefined) {
result.asm = '<No output file>';
return result;
}
if (result.asmSize >= maxSize) {
result.asm =
'<No output: generated assembly was too large' +
` (${result.asmSize} > ${maxSize} bytes)>`;
return result;
}
if (postProcess.length > 0) {
return await this.execPostProcess(result, postProcess, outputFilename, maxSize);
}
const contents = await fs.readFile(outputFilename, {encoding: 'utf8'});
result.asm = contents.toString();
return result;
})()
).then(asm => {
result.asm = typeof asm === 'string' ? asm : asm.asm;
return result;
});
return Promise.all([asmPromise, optPromise, []]);
}
// Matches the start/end of a GAS inline-assembly block emitted by the host compiler.
private static readonly appBlockStartRe = /^#APP\b/;
private static readonly appBlockEndRe = /^#NO_APP\b/;
// Matches the .nv_fatbin section directive that NVCC injects to hold the fat binary blob.
private static readonly nvFatBinSectionRe = /^\s*\.section\s+\.nv_fatbin\b/;
/**
* Strip `#APP`/`#NO_APP` inline-assembly blocks that contain a `.nv_fatbin`
* section from the host-side x86 assembly. These blocks hold the raw CUDA
* fat binary blob (the `fatbinData` label followed by hundreds of `.quad`
* hex lines) which is never useful to inspect in the asm view.
*
* Only blocks that contain `.nv_fatbin` are removed; any `#APP`/`#NO_APP`
* blocks originating from genuine user inline-assembly are left intact.
*/
protected removeNvccFatbinaryBlob(asm: string): string {
const lines = asm.split('\n');
const result: string[] = [];
let inAppBlock = false;
let hasFatBin = false;
let appBuffer: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (NvccCompiler.appBlockStartRe.test(trimmed)) {
inAppBlock = true;
hasFatBin = false;
appBuffer = [line];
} else if (NvccCompiler.appBlockEndRe.test(trimmed)) {
inAppBlock = false;
if (!hasFatBin) {
// Not a fat-binary block — keep it
appBuffer.push(line);
result.push(...appBuffer);
}
appBuffer = [];
} else if (inAppBlock) {
if (NvccCompiler.nvFatBinSectionRe.test(line)) {
hasFatBin = true;
}
appBuffer.push(line);
} else {
result.push(line);
}
}
// Handle (malformed) unclosed #APP block: keep it
if (appBuffer.length > 0) {
result.push(...appBuffer);
}
return result.join('\n');
}
override async processAsm(result, filters: ParseFiltersAndOutputOptions, options: string[]) {
if (filters.labels && typeof result.asm === 'string') {
result = {...result, asm: this.removeNvccFatbinaryBlob(result.asm)};
}
return super.processAsm(result, filters, options);
}
override async extractDeviceCode(
result: CompilationResult,
filters: ParseFiltersAndOutputOptions,
compilationInfo: CompilationInfo,
) {
const {dirPath} = result;
const {demangle} = filters;
const devices = {...result.devices};
if (dirPath) {
const files = await fs.readdir(dirPath);
const maxSize = this.env.ceProps('max-asm-size', 64 * 1024 * 1024);
await Promise.all(
files
.filter(f => f.endsWith('.ptx') || f.endsWith('.cubin'))
.map(async name => {
const type = name.endsWith('.ptx') ? 'PTX' : 'SASS';
const {asm} =
type === 'PTX'
? {asm: await fs.readFile(Path.join(dirPath, name), 'utf8')}
: await this.nvdisasm(Path.join(dirPath, name), {dirPath}, maxSize);
const archAndCode = name.split('.').slice(1, -1).join(', ') || '';
const nameAndArch = type + (archAndCode ? ` (${archAndCode.toLowerCase()})` : '');
const parser = type === 'PTX' ? this.ptxParser : this.deviceAsmParser;
Object.assign(devices, {
[nameAndArch]: await this.postProcessAsm(
{
okToCache: demangle,
...parser.process(asm, {...filters, binary: type === 'SASS'}),
},
{...filters, binary: type === 'SASS'},
),
});
}),
);
result.devices = devices;
}
return result;
}
}