diff --git a/.eslintignore b/.eslintignore index 43a2571e7..f9ab09a4b 100644 --- a/.eslintignore +++ b/.eslintignore @@ -8,5 +8,5 @@ views cypress # Autogenerated files -lib/asm-docs/generated/asm-docs-*.js +lib/asm-docs/generated/asm-docs-* etc/scripts/docenizer/vendor/jvms.html diff --git a/.gitattributes b/.gitattributes index cc5a86526..b1e215510 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,5 @@ docs/* linguist-documentation *.s linguist-generated *.asm linguist-generated -lib/asm-docs/generated/asm-docs-*.js linguist-generated +lib/asm-docs/generated/asm-docs-* linguist-generated test/*-cases/* linguist-generated diff --git a/.nycrc.yml b/.nycrc.yml index d93f5ce0f..8a6923e23 100644 --- a/.nycrc.yml +++ b/.nycrc.yml @@ -3,7 +3,7 @@ include: - lib/**/*.js - lib/**/*.ts exclude: - - lib/asm-docs/generated/asm-docs-*.js + - lib/asm-docs/generated/asm-docs-* - lib/compilers/fake-for-test.js - lib/**/*.d.ts report-dir: ./out/coverage diff --git a/.prettierignore b/.prettierignore index f4aa01431..964f22e9e 100644 --- a/.prettierignore +++ b/.prettierignore @@ -38,7 +38,7 @@ examples out # Autogenerated files -lib/asm-docs/generated/asm-docs-*.js +lib/asm-docs/generated/asm-docs-* ######################### diff --git a/etc/scripts/docenizers/.gitignore b/etc/scripts/docenizers/.gitignore new file mode 100644 index 000000000..e3b3bcd62 --- /dev/null +++ b/etc/scripts/docenizers/.gitignore @@ -0,0 +1,5 @@ +asm-docs +asm-docs-arm +evm-inst-docs +python-inst-docs +vendor/*.html diff --git a/etc/scripts/docenizers/docenizer-6502.py b/etc/scripts/docenizers/docenizer-6502.py index 97e4344cc..7b484edb3 100755 --- a/etc/scripts/docenizers/docenizer-6502.py +++ b/etc/scripts/docenizers/docenizer-6502.py @@ -7,7 +7,7 @@ import re import urllib.request -DOC_URL_BASE = "https://raw.githubusercontent.com/mist64/c64ref/master/6502/" +DOC_URL_BASE = "https://raw.githubusercontent.com/mist64/c64ref/4274bd8782c5d3b18c68e6b9479b0ec751eb96b1/Source/6502/" doc_files = {f"{DOC_URL_BASE}{filename}":cpu_type for filename, cpu_type in { "cpu_6502.txt" : "6502", "cpu_65c02.txt" : "65c02", @@ -39,7 +39,7 @@ class Instruction: html = "" for desc_line in self.description: html += f"
{escape_quotes(desc_line)}
" - return html + return html elif self.long_name: return f"{escape_quotes(self.long_name)}
" elif self.name: @@ -135,7 +135,9 @@ def parse_descriptions(line, line_num, cpu_type, instructions): def write_script(filename, instructions): - script = ["export function getAsmOpcode(opcode) {", + script = ["import {AssemblyInstructionInfo} from '../base';", + "", + "export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {", " if (!opcode) return;", " switch (opcode.toUpperCase()) {"] for inst in instructions.values(): @@ -176,7 +178,7 @@ def escape_quotes(string): def get_arguments(): parser = argparse.ArgumentParser() help_text = "the location to which the script will be written" - relative_path = "/../../../lib/handlers/asm-docs-6502.js" + relative_path = "../../../../lib/asm-docs/generated/asm-docs-6502.ts" script_path = os.path.realpath(__file__) script_dir = os.path.dirname(script_path) default_path = os.path.normpath(script_dir + relative_path) diff --git a/etc/scripts/docenizers/docenizer-amd64.py b/etc/scripts/docenizers/docenizer-amd64.py index 91c78f376..0e5ea96e0 100755 --- a/etc/scripts/docenizers/docenizer-amd64.py +++ b/etc/scripts/docenizers/docenizer-amd64.py @@ -19,8 +19,8 @@ parser = argparse.ArgumentParser(description='Docenizes HTML version of the offi parser.add_argument('-i', '--inputfolder', type=str, help='Folder where the input files reside as .html. Default is ./asm-docs/', default='asm-docs') -parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .js file. Default is ./asm-docs.js', - default='./asm-docs.js') +parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .ts file. Default is ./asm-docs-amd64.ts', + default='./asm-docs-amd64.ts') parser.add_argument('-d', '--downloadfolder', type=str, help='Folder where the archive will be downloaded and extracted', default='asm-docs') @@ -354,10 +354,12 @@ def main(): print(f"Writing {len(instructions)} instructions") with open(args.outputpath, 'w') as f: f.write(""" -export function getAsmOpcode(opcode) { +import {AssemblyInstructionInfo} from '../base'; + +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { if (!opcode) return; switch (opcode.toUpperCase()) { -""") +""".lstrip()) for inst in instructions: for name in sorted(inst.names): f.write(f' case "{name}":\n') diff --git a/etc/scripts/docenizers/docenizer-arm.py b/etc/scripts/docenizers/docenizer-arm32.py similarity index 96% rename from etc/scripts/docenizers/docenizer-arm.py rename to etc/scripts/docenizers/docenizer-arm32.py index 719b8bcab..7dccdcea9 100755 --- a/etc/scripts/docenizers/docenizer-arm.py +++ b/etc/scripts/docenizers/docenizer-arm32.py @@ -19,8 +19,8 @@ parser = argparse.ArgumentParser(description='Docenizes XML version of the offic parser.add_argument('-i', '--inputfolder', type=str, help='Folder where the input files reside as .xml. Default is ./asm-docs-arm/', default='asm-docs-arm') -parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .js file. Default is ./asm-docs-arm.js', - default='./asm-docs-arm.js') +parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .ts file. Default is ./asm-docs-arm32.ts', + default='./asm-docs-arm32.ts') parser.add_argument('-d', '--downloadfolder', type=str, help='Folder where the archive will be downloaded and extracted', default='asm-docs-arm') @@ -178,10 +178,13 @@ def docenizer(): sys.exit(3) print("Writing {} instructions".format(len(instructions))) with open(args.outputpath, 'w') as f: - f.write("""export function getAsmOpcode(opcode) { + f.write(""" +import {AssemblyInstructionInfo} from '../base'; + +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { if (!opcode) return; switch (opcode) { -""") +""".lstrip()) for inst in instructions: for name in sorted(inst.names): f.write(' case "{}":\n'.format(name)) diff --git a/etc/scripts/docenizers/docenizer-avr.py b/etc/scripts/docenizers/docenizer-avr.py index 3f0640004..355ee6848 100755 --- a/etc/scripts/docenizers/docenizer-avr.py +++ b/etc/scripts/docenizers/docenizer-avr.py @@ -36,7 +36,7 @@ def main(): def get_arguments(): parser = argparse.ArgumentParser() help_text = "the location to which the script will be written" - relative_path = "/../../../lib/handlers/asm-docs-avr.js" + relative_path = "../../../../lib/asm-docs/generated/asm-docs-avr.ts" script_path = os.path.realpath(__file__) script_dir = os.path.dirname(script_path) default_path = os.path.normpath(script_dir + relative_path) @@ -89,7 +89,9 @@ def process_description(desc): def write_script(filename, instructions): log_message(f"writing to {filename}...") with open(filename, "w") as script: - script.write("export function getAsmOpcode(opcode) {\n") + script.write("import {AssemblyInstructionInfo} from '../base';\n") + script.write("\n") + script.write("export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {\n") script.write(" if (!opcode) return;\n") script.write(" switch (opcode.toUpperCase()) {\n") for inst in instructions.values(): diff --git a/etc/scripts/docenizers/docenizer-evm.py b/etc/scripts/docenizers/docenizer-evm.py index e2e3bd355..b647ac07b 100755 --- a/etc/scripts/docenizers/docenizer-evm.py +++ b/etc/scripts/docenizers/docenizer-evm.py @@ -13,8 +13,8 @@ parser = argparse.ArgumentParser(description='Docenizes the EVM documentation') parser.add_argument('-i', '--inputfolder', type=str, help='Folder where the input files reside as .html. Default is ./evm-inst-docs/', default='evm-inst-docs') -parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .js file. Default is ./evm-inst-docs.js', - default='./evm-inst-docs.js') +parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .ts file. Default is ./asm-docs-evm.ts', + default='./asm-docs-evm.ts') parser.add_argument('-d', '--downloadfolder', type=str, help='Folder where the archive will be downloaded and extracted', default='evm-inst-docs') @@ -126,10 +126,12 @@ def main(): print(f"Writing {len(instructions)} instructions") with open(args.outputpath, 'w') as f: f.write(""" -export function getAsmOpcode(opcode) { +import {AssemblyInstructionInfo} from '../base'; + +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { if (!opcode) return; switch (opcode.toUpperCase()) { -""") +""".lstrip()) for inst in instructions: f.write(f' case "{inst.mnemonic}":\n') f.write(' return {}'.format(json.dumps({ diff --git a/etc/scripts/docenizers/docenizer-java.js b/etc/scripts/docenizers/docenizer-java.js index 22223f1da..43541e4be 100644 --- a/etc/scripts/docenizers/docenizer-java.js +++ b/etc/scripts/docenizers/docenizer-java.js @@ -89,7 +89,9 @@ const main = async () => { .slice(1) // Drop 1 because the first is the "mne monic" .map(it => extract($(it), $)) .flat(); - console.log('export function getAsmOpcode(opcode) {'); + console.log('import {AssemblyInstructionInfo} from \'../base\';'); + console.log(''); + console.log('export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {'); console.log(' if (!opcode) return;'); console.log(' switch (opcode.toUpperCase()) {'); for (const instruction of instructions) { diff --git a/etc/scripts/docenizers/docenizer-java.sh b/etc/scripts/docenizers/docenizer-java.sh index 889960a48..3f4ed63c1 100755 --- a/etc/scripts/docenizers/docenizer-java.sh +++ b/etc/scripts/docenizers/docenizer-java.sh @@ -5,4 +5,4 @@ JVMS_PATH=$(pwd)/vendor/jvms.html [ -f "$JVMS_PATH" ] || curl https://docs.oracle.com/javase/specs/jvms/se18/html/jvms-6.html -o "$JVMS_PATH" $(pwd)/../find-node ../../../.node-bin -$(cat ../../../.node-bin) docenizer-java.js > ../../../lib/asm-docs/generated/asm-docs-java.js +$(cat ../../../.node-bin) docenizer-java.js > ../../../lib/asm-docs/generated/asm-docs-java.ts diff --git a/etc/scripts/docenizers/docenizer-llvm.sh b/etc/scripts/docenizers/docenizer-llvm.sh index ce00b5bda..6c6241dae 100755 --- a/etc/scripts/docenizers/docenizer-llvm.sh +++ b/etc/scripts/docenizers/docenizer-llvm.sh @@ -4,4 +4,4 @@ LANGREF_PATH=$(pwd)/vendor/LangRef.html [ -f "$LANGREF_PATH" ] || curl https://llvm.org/docs/LangRef.html -o "$LANGREF_PATH" -../../../node_modules/.bin/ts-node-esm docenizer-llvm.ts > ../../../lib/asm-docs/generated/asm-docs-llvm.js +../../../node_modules/.bin/ts-node-esm docenizer-llvm.ts > ../../../lib/asm-docs/generated/asm-docs-llvm.ts diff --git a/etc/scripts/docenizers/docenizer-llvm.ts b/etc/scripts/docenizers/docenizer-llvm.ts index 77e96820c..10275e2f4 100644 --- a/etc/scripts/docenizers/docenizer-llvm.ts +++ b/etc/scripts/docenizers/docenizer-llvm.ts @@ -49,7 +49,9 @@ const $ = cheerio.load(contents); const names = getInstructionList($.root(), $); const info = names.map((x) => getInstructionInfo(x, $.root(), $)); -console.log('export function getAsmOpcode(opcode) {'); +console.log('import {AssemblyInstructionInfo} from \'../base\';'); +console.log(''); +console.log('export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {'); console.log(' if (!opcode) return;'); console.log(' switch (opcode.toUpperCase()) {'); @@ -57,8 +59,8 @@ for (const instruction of info) { console.log(` case '${instruction.name.toUpperCase()}':`); console.log(' return {'); console.log(` url: \`${instruction.url}\`,`); - console.log(` html: \`${instruction.html.replace('\n', '')}\`,`); - console.log(` tooltip: \`${instruction.tooltip.replace('\n', '')}\`,`); + console.log(` html: \`${instruction.html.replaceAll('\n', '').replaceAll('`', '\\`')}\`,`); + console.log(` tooltip: \`${instruction.tooltip.replaceAll('\n', '').replaceAll('`', '\\`')}\`,`); console.log(' };'); } diff --git a/etc/scripts/docenizers/docenizer-python.py b/etc/scripts/docenizers/docenizer-python.py index 2f72ffd52..71c21e10d 100755 --- a/etc/scripts/docenizers/docenizer-python.py +++ b/etc/scripts/docenizers/docenizer-python.py @@ -18,8 +18,8 @@ parser = argparse.ArgumentParser(description='Docenizes HTML version of the offi parser.add_argument('-i', '--inputfolder', type=str, help='Folder where the input files reside as .html. Default is ./python-inst-docs/', default='python-inst-docs') -parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .js file. Default is ./python-inst-docs.js', - default='./python-inst-docs.js') +parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .ts file. Default is ./asm-docs-python.ts', + default='./asm-docs-python.ts') parser.add_argument('-d', '--downloadfolder', type=str, help='Folder where the archive will be downloaded and extracted', default='python-inst-docs') @@ -114,10 +114,12 @@ def main(): print(f"Writing {len(instructions)} instructions") with open(args.outputpath, 'w') as f: f.write(""" -export function getAsmOpcode(opcode) { +import {AssemblyInstructionInfo} from '../base'; + +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { if (!opcode) return; switch (opcode.toUpperCase()) { -""") +""".lstrip()) for inst in instructions: for name in sorted(inst.names): f.write(f' case "{name}":\n') diff --git a/etc/scripts/docenizers/tsconfig.json b/etc/scripts/docenizers/tsconfig.json new file mode 100644 index 000000000..738170486 --- /dev/null +++ b/etc/scripts/docenizers/tsconfig.json @@ -0,0 +1,6 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "lib": ["es2021"], + } +} diff --git a/lib/asm-docs/generated/asm-docs-6502.js b/lib/asm-docs/generated/asm-docs-6502.ts similarity index 98% rename from lib/asm-docs/generated/asm-docs-6502.js rename to lib/asm-docs/generated/asm-docs-6502.ts index 71865475c..82992f266 100644 --- a/lib/asm-docs/generated/asm-docs-6502.js +++ b/lib/asm-docs/generated/asm-docs-6502.ts @@ -1,4 +1,6 @@ -export function getAsmOpcode(opcode) { +import {AssemblyInstructionInfo} from '../base'; + +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { if (!opcode) return; switch (opcode.toUpperCase()) { case "ADC": @@ -17,7 +19,7 @@ export function getAsmOpcode(opcode) { case "ASL": return { - "html": "The shift left instruction shifts either the accumulator or the address memory location 1 bit to the left, with the bit 0 always being set to 0 and the bit 7 output always being contained in the carry flag. ASL either shifts the accumulator left 1 bit or is a read/modify/write instruction that affects only memory.
The instruction does not affect the overflow bit, sets N equal to the result bit 7 (bit 6 in the input), sets Z flag if the result is equal to 0, otherwise resets Z and stores the input bit 7 in the carry flag.
", + "html": "The shift left instruction shifts either the accumulator or the address memory location 1 bit to the left, with the bit 0 always being set to 0 and the the input bit 7 being stored in the carry flag. ASL either shifts the accumulator left 1 bit or is a read/modify/write instruction that affects only memory.
The instruction does not affect the overflow bit, sets N equal to the result bit 7 (bit 6 in the input), sets Z flag if the result is equal to 0, otherwise resets Z and stores the input bit 7 in the carry flag.
", "tooltip": "Arithmetic Shift Left", "url": "https://www.pagetable.com/c64ref/6502/?cpu=6502&tab=2#ASL", }; diff --git a/lib/asm-docs/generated/asm-docs-amd64.js b/lib/asm-docs/generated/asm-docs-amd64.ts similarity index 92% rename from lib/asm-docs/generated/asm-docs-amd64.js rename to lib/asm-docs/generated/asm-docs-amd64.ts index 2ca3545f3..9bbb3e13d 100644 --- a/lib/asm-docs/generated/asm-docs-amd64.js +++ b/lib/asm-docs/generated/asm-docs-amd64.ts @@ -1,5 +1,6 @@ +import {AssemblyInstructionInfo} from '../base'; -export function getAsmOpcode(opcode) { +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { if (!opcode) return; switch (opcode.toUpperCase()) { case "AAA": @@ -46,7 +47,7 @@ export function getAsmOpcode(opcode) { case "ADD": return { - "html": "Adds the destination operand (first operand) and the source operand (second operand) and then stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, a register, or a memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.
The ADD instruction performs integer addition. It evaluates the result for both signed and unsigned integer operands and sets the CF and OF flags to indicate a carry (overflow) in the signed or unsigned result, respectively. The SF flag indicates the sign of the signed result.
This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically.
In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Using a REX prefix in the form of REX.R permits access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.
", + "html": "Adds the destination operand (first operand) and the source operand (second operand) and then stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, a register, or a memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.
The ADD instruction performs integer addition. It evaluates the result for both signed and unsigned integer operands and sets the OF and CF flags to indicate a carry (overflow) in the signed or unsigned result, respectively. The SF flag indicates the sign of the signed result.
This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically.
In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Using a REX prefix in the form of REX.R permits access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.
", "tooltip": "Adds the destination operand (first operand) and the source operand (second operand) and then stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, a register, or a memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.", "url": "http://www.felixcloutier.com/x86/ADD.html" }; @@ -54,16 +55,16 @@ export function getAsmOpcode(opcode) { case "ADDPD": case "VADDPD": return { - "html": "Add two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point results in the destination operand.
EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.
", - "tooltip": "Add two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point results in the destination operand.", + "html": "Adds two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point result in the destination operand.
EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.
", + "tooltip": "Adds two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point result in the destination operand.", "url": "http://www.felixcloutier.com/x86/ADDPD.html" }; case "ADDPS": case "VADDPS": return { - "html": "Add four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point results in the destination operand.
EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.
", - "tooltip": "Add four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point results in the destination operand.", + "html": "Adds four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point result in the destination operand.
EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.
", + "tooltip": "Adds four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point result in the destination operand.", "url": "http://www.felixcloutier.com/x86/ADDPS.html" }; @@ -109,32 +110,32 @@ export function getAsmOpcode(opcode) { case "AESDEC": case "VAESDEC": return { - "html": "This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.
Use the AESDEC instruction for all but the last decryption round. For the last decryption round, use the AESDECLAST instruction.
128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", - "tooltip": "This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.", + "html": "This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.
Use the AESDEC instruction for all but the last decryption round. For the last decryption round, use the AESDECLAST instruction.
VEX and EVEX encoded versions of the instruction allow 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.
The EVEX encoded form of this instruction does not support memory fault suppression.
", + "tooltip": "This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.", "url": "http://www.felixcloutier.com/x86/AESDEC.html" }; case "AESDECLAST": case "VAESDECLAST": return { - "html": "This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.
128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", - "tooltip": "This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.", + "html": "This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.
VEX and EVEX encoded versions of the instruction allow 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.
The EVEX encoded form of this instruction does not support memory fault suppression.
", + "tooltip": "This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.", "url": "http://www.felixcloutier.com/x86/AESDECLAST.html" }; case "AESENC": case "VAESENC": return { - "html": "This instruction performs a single round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.
Use the AESENC instruction for all but the last encryption rounds. For the last encryption round, use the AESENCCLAST instruction.
128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", - "tooltip": "This instruction performs a single round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.", + "html": "This instruction performs a single round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.
Use the AESENC instruction for all but the last encryption rounds. For the last encryption round, use the AESENCCLAST instruction.
VEX and EVEX encoded versions of the instruction allow 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.
The EVEX encoded form of this instruction does not support memory fault suppression.
", + "tooltip": "This instruction performs a single round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.", "url": "http://www.felixcloutier.com/x86/AESENC.html" }; case "AESENCLAST": case "VAESENCLAST": return { - "html": "This instruction performs the last round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.
128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", - "tooltip": "This instruction performs the last round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.", + "html": "This instruction performs the last round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.
VEX and EVEX encoded versions of the instruction allows 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.
The EVEX encoded form of this instruction does not support memory fault suppression.
", + "tooltip": "This instruction performs the last round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.", "url": "http://www.felixcloutier.com/x86/AESENCLAST.html" }; @@ -319,7 +320,7 @@ export function getAsmOpcode(opcode) { case "BSF": return { - "html": "Searches the source operand (second operand) for the least significant set bit (1 bit). If a least significant 1 bit is found, its bit index is stored in the destination operand (first operand). The source operand can be a register or a memory location; the destination operand is a register. The bit index is an unsigned offset from bit 0 of the source operand. If the content of the source operand is 0, the content of the destination operand is undefined.
In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Using a REX prefix in the form of REX.R permits access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.
", + "html": "Searches the source operand (second operand) for the least significant set bit (1 bit). If a least significant 1 bit is found, its bit index is stored in the destination operand (first operand). The source operand can be a register or a memory location; the destination operand is a register. The bit index is an unsigned offset from bit 0 of the source operand. If the content of the source operand is 0, the content of the destination operand is undefined.
In 64-bit mode, the instruction\u2019s default operation size access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.
", "tooltip": "Searches the source operand (second operand) for the least significant set bit (1 bit). If a least significant 1 bit is found, its bit index is stored in the destination operand (first operand). The source operand can be a register or a memory location; the destination operand is a register. The bit index is an unsigned offset from bit 0 of the source operand. If the content of the source operand is 0, the content of the destination operand is undefined.", "url": "http://www.felixcloutier.com/x86/BSF.html" }; @@ -419,7 +420,7 @@ export function getAsmOpcode(opcode) { case "CLFLUSH": return { - "html": "Invalidates from every level of the cache hierarchy in the cache coherence domain the cache line that contains the linear address specified with the memory operand. If that cache line contains modified data at any level of the cache hierarchy, that data is written back to memory. The source operand is a byte memory location.
The availability of CLFLUSH is indicated by the presence of the CPUID feature flag CLFSH (CPUID.01H:EDX[bit 19]). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).
The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCHh instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLFLUSH instruction is not ordered with respect to PREFETCHh instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLFLUSH instruction that references the cache line).
Executions of the CLFLUSH instruction are ordered with respect to each other and with respect to writes, locked read-modify-write instructions, fence instructions, and executions of CLFLUSHOPT to the same cache line.1 They are not ordered with respect to executions of CLFLUSHOPT to different cache lines.
The CLFLUSH instruction can be used at all privilege levels and is subject to all permission checking and faults associated with a byte load (and in addition, a CLFLUSH instruction is allowed to flush a linear address in an execute-only segment). Like a load, the CLFLUSH instruction sets the A bit but not the D bit in the page tables.
", + "html": "Invalidates from every level of the cache hierarchy in the cache coherence domain the cache line that contains the linear address specified with the memory operand. If that cache line contains modified data at any level of the cache hierarchy, that data is written back to memory. The source operand is a byte memory location.
The availability of CLFLUSH is indicated by the presence of the CPUID feature flag CLFSH (CPUID.01H:EDX[bit 19]). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).
The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCHh instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLFLUSH instruction is not ordered with respect to PREFETCHh instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLFLUSH instruction that references the cache line).
Executions of the CLFLUSH instruction are ordered with respect to each other and with respect to writes, locked read-modify-write instructions, and fence instructions.1 They are not ordered with respect to executions of CLFLUSHOPT and CLWB. Software can use the SFENCE instruction to order an execution of CLFLUSH relative to one of those operations.
The CLFLUSH instruction can be used at all privilege levels and is subject to all permission checking and faults associated with a byte load (and in addition, a CLFLUSH instruction is allowed to flush a linear address in an execute-only segment). Like a load, the CLFLUSH instruction sets the A bit but not the D bit in the page tables.
", "tooltip": "Invalidates from every level of the cache hierarchy in the cache coherence domain the cache line that contains the linear address specified with the memory operand. If that cache line contains modified data at any level of the cache hierarchy, that data is written back to memory. The source operand is a byte memory location.", "url": "http://www.felixcloutier.com/x86/CLFLUSH.html" }; @@ -431,6 +432,13 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/CLI.html" }; + case "CLRSSBSY": + return { + "html": "Clear busy flag in supervisor shadow stack token reference by m64. Subsequent to marking the shadow stack as not busy the SSP is loaded with value 0.
", + "tooltip": "Clear busy flag in supervisor shadow stack token reference by m64. Subsequent to marking the shadow stack as not busy the SSP is loaded with value 0.", + "url": "http://www.felixcloutier.com/x86/CLRSSBSY.html" + }; + case "CLTS": return { "html": "Clears the task-switched (TS) flag in the CR0 register. This instruction is intended for use in operating-system procedures. It is a privileged instruction that can only be executed at a CPL of 0. It is allowed to be executed in real-address mode to allow initialization for protected mode.
The processor sets the TS flag every time a task switch occurs. The flag is used to synchronize the saving of FPU context in multitasking applications. See the description of the TS flag in the section titled \u201cControl Registers\u201d in Chapter 2 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A, for more information about this flag.
CLTS operation is the same in non-64-bit modes and 64-bit mode.
See Chapter 25, \u201cVMX Non-Root Operation,\u201d of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, for more information about the behavior of this instruction in VMX non-root operation.
", @@ -440,7 +448,7 @@ export function getAsmOpcode(opcode) { case "CLWB": return { - "html": "Writes back to memory the cache line (if modified) that contains the linear address specified with the memory operand from any level of the cache hierarchy in the cache coherence domain. The line may be retained in the cache hierarchy in non-modified state. Retaining the line in the cache hierarchy is a performance optimization (treated as a hint by hardware) to reduce the possibility of cache miss on a subsequent access. Hardware may choose to retain the line at any of the levels in the cache hierarchy, and in some cases, may invalidate the line from the cache hierarchy. The source operand is a byte memory location.
The availability of CLWB instruction is indicated by the presence of the CPUID feature flag CLWB (bit 24 of the EBX register, see \u201cCPUID \u2014 CPU Identification\u201d in this chapter). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).
The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions that are assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCHh instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLWB instruction is not ordered with respect to PREFETCHh instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLWB instruction that references the cache line).
CLWB instruction is ordered only by store-fencing operations. For example, software can use an SFENCE, MFENCE, XCHG, or LOCK-prefixed instructions to ensure that previous stores are included in the write-back. CLWB instruction need not be ordered by another CLWB or CLFLUSHOPT instruction. CLWB is implicitly ordered with older stores executed by the logical processor to the same address.
For usages that require only writing back modified data from cache lines to memory (do not require the line to be invalidated), and expect to subsequently access the data, software is recommended to use CLWB (with appropriate fencing) instead of CLFLUSH or CLFLUSHOPT for improved performance.
", + "html": "Writes back to memory the cache line (if modified) that contains the linear address specified with the memory operand from any level of the cache hierarchy in the cache coherence domain. The line may be retained in the cache hierarchy in non-modified state. Retaining the line in the cache hierarchy is a performance optimization (treated as a hint by hardware) to reduce the possibility of cache miss on a subsequent access. Hardware may choose to retain the line at any of the levels in the cache hierarchy, and in some cases, may invalidate the line from the cache hierarchy. The source operand is a byte memory location.
The availability of CLWB instruction is indicated by the presence of the CPUID feature flag CLWB (bit 24 of the EBX register, see \u201cCPUID \u2014 CPU Identification\u201d in this chapter). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).
The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions that are assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCHh instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLWB instruction is not ordered with respect to PREFETCHh instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLWB instruction that references the cache line).
Executions of the CLWB instruction are ordered with respect to fence instructions and to locked read-modify-write instructions; they are also ordered with respect to older writes to the cache line being written back. They are not ordered with respect to other executions of CLWB, to executions of CLFLUSH and CLFLUSHOPT, or to younger writes to the cache line being written back. Software can use the SFENCE instruction to order an execution of CLWB relative to one of those operations.
For usages that require only writing back modified data from cache lines to memory (do not require the line to be invalidated), and expect to subsequently access the data, software is recommended to use CLWB (with appropriate fencing) instead of CLFLUSH or CLFLUSHOPT for improved performance.
", "tooltip": "Writes back to memory the cache line (if modified) that contains the linear address specified with the memory operand from any level of the cache hierarchy in the cache coherence domain. The line may be retained in the cache hierarchy in non-modified state. Retaining the line in the cache hierarchy is a performance optimization (treated as a hint by hardware) to reduce the possibility of cache miss on a subsequent access. Hardware may choose to retain the line at any of the levels in the cache hierarchy, and in some cases, may invalidate the line from the cache hierarchy. The source operand is a byte memory location.", "url": "http://www.felixcloutier.com/x86/CLWB.html" }; @@ -483,8 +491,8 @@ export function getAsmOpcode(opcode) { case "CMOVS": case "CMOVZ": return { - "html": "The CMOVcc instructions check the state of one or more of the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) and perform a move operation if the flags are in a specified state (or condition). A condition code (cc) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOVcc instruction.
These instructions can move 16-bit, 32-bit or 64-bit values from memory to a general-purpose register or from one general-purpose register to another. Conditional moves of 8-bit register operands are not supported.
The condition for each CMOVcc mnemonic is given in the description column of the above table. The terms \u201cless\u201d and \u201cgreater\u201d are used for comparisons of signed integers and the terms \u201cabove\u201d and \u201cbelow\u201d are used for unsigned integers.
Because a particular state of the status flags can sometimes be interpreted in two ways, two mnemonics are defined for some opcodes. For example, the CMOVA (conditional move if above) instruction and the CMOVNBE (conditional move if not below or equal) instruction are alternate mnemonics for the opcode 0F 47H.
The CMOVcc instructions were introduced in P6 family processors; however, these instructions may not be supported by all IA-32 processors. Software can determine if the CMOVcc instructions are supported by checking the processor\u2019s feature information with the CPUID instruction (see \u201cCPUID\u2014CPU Identification\u201d in this chapter).
", - "tooltip": "The CMOVcc instructions check the state of one or more of the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) and perform a move operation if the flags are in a specified state (or condition). A condition code (cc) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOVcc instruction.", + "html": "Each of the CMOVcc instructions performs a move operation if the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) are in a specified state (or condition). A condition code (cc) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOVcc instruction.
Specifically, CMOVcc loads data from its source operand into a temporary register unconditionally (regardless of the condition code and the status flags in the EFLAGS register). If the condition code associated with the instruction (cc) is satisfied, the data in the temporary register is then copied into the instruction's destination operand.
These instructions can move 16-bit, 32-bit or 64-bit values from memory to a general-purpose register or from one general-purpose register to another. Conditional moves of 8-bit register operands are not supported.
The condition for each CMOVcc mnemonic is given in the description column of the above table. The terms \u201cless\u201d and \u201cgreater\u201d are used for comparisons of signed integers and the terms \u201cabove\u201d and \u201cbelow\u201d are used for unsigned integers.
Because a particular state of the status flags can sometimes be interpreted in two ways, two mnemonics are defined for some opcodes. For example, the CMOVA (conditional move if above) instruction and the CMOVNBE (conditional move if not below or equal) instruction are alternate mnemonics for the opcode 0F 47H.
", + "tooltip": "Each of the CMOVcc instructions performs a move operation if the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) are in a specified state (or condition). A condition code (cc) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOVcc instruction.", "url": "http://www.felixcloutier.com/x86/CMOVcc.html" }; @@ -498,16 +506,16 @@ export function getAsmOpcode(opcode) { case "CMPPD": case "VCMPPD": return { - "html": "Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.
EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).
VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Four comparisons are performed with results written to the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).
128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Two comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).
VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. Two comparisons are performed with results written to bits 127:0 of the destination operand.
", - "tooltip": "Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.", + "html": "Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.
EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).
VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Four comparisons are performed with results written to the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).
128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Two comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).
VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. Two comparisons are performed with results written to bits 127:0 of the destination operand.
", + "tooltip": "Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.", "url": "http://www.felixcloutier.com/x86/CMPPD.html" }; case "CMPPS": case "VCMPPS": return { - "html": "Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).
VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Eight comparisons are performed with results written to the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Four comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. Four comparisons are performed with results written to bits 127:0 of the destination operand.
", - "tooltip": "Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.", + "html": "Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).
VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Eight comparisons are performed with results written to the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Four comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destina-
", + "tooltip": "Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.", "url": "http://www.felixcloutier.com/x86/CMPPS.html" }; @@ -525,8 +533,8 @@ export function getAsmOpcode(opcode) { case "CMPSS": case "VCMPSS": return { - "html": "Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 32-bit memory location. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 32-bit memory location. The result is stored in the low 32 bits of the destination operand; bits 128:32 of the destination operand are copied from the first source operand. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
EVEX encoded version: The first source operand (second operand) is an XMM register. The second source operand can be a XMM register or a 32-bit memory location. The destination operand (first operand) is an opmask register. The comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false), written to the destination starting from the LSB according to the writemask k2. Bits (MAX_KL-1:128) of the destination register are cleared.
The comparison predicate operand is an 8-bit immediate:
", - "tooltip": "Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.", + "html": "Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 32-bit memory location. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 32-bit memory location. The result is stored in the low 32 bits of the destination operand; bits 127:32 of the destination operand are copied from the first source operand. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).
EVEX encoded version: The first source operand (second operand) is an XMM register. The second source operand can be a XMM register or a 32-bit memory location. The destination operand (first operand) is an opmask register. The comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false), written to the destination starting from the LSB according to the writemask k2. Bits (MAX_KL-1:128) of the destination register are cleared.
The comparison predicate operand is an 8-bit immediate:
", + "tooltip": "Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.", "url": "http://www.felixcloutier.com/x86/CMPSS.html" }; @@ -539,7 +547,7 @@ export function getAsmOpcode(opcode) { case "CMPXCHG8B": return { - "html": "Compares the 64-bit value in EDX:EAX (or 128-bit value in RDX:RAX if operand size is 128 bits) with the operand (destination operand). If the values are equal, the 64-bit value in ECX:EBX (or 128-bit value in RCX:RBX) is stored in the destination operand. Otherwise, the value in the destination operand is loaded into EDX:EAX (or RDX:RAX). The destination operand is an 8-byte memory location (or 16-byte memory location if operand size is 128 bits). For the EDX:EAX and ECX:EBX register pairs, EDX and ECX contain the high-order 32 bits and EAX and EBX contain the low-order 32 bits of a 64-bit value. For the RDX:RAX and RCX:RBX register pairs, RDX and RCX contain the high-order 64 bits and RAX and RBX contain the low-order 64bits of a 128-bit value.
This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically. To simplify the interface to the processor\u2019s bus, the destination operand receives a write cycle without regard to the result of the comparison. The destination operand is written back if the comparison fails; otherwise, the source operand is written into the destination. (The processor never produces a locked read without also producing a locked write.)
In 64-bit mode, default operation size is 64 bits. Use of the REX.W prefix promotes operation to 128 bits. Note that CMPXCHG16B requires that the destination (memory) operand be 16-byte aligned. See the summary chart at the beginning of this section for encoding data and limits. For information on the CPUID flag that indicates CMPXCHG16B, see page 3-213.
This instruction encoding is not supported on Intel processors earlier than the Pentium processors.
", + "html": "Compares the 64-bit value in EDX:EAX (or 128-bit value in RDX:RAX if operand size is 128 bits) with the operand (destination operand). If the values are equal, the 64-bit value in ECX:EBX (or 128-bit value in RCX:RBX) is stored in the destination operand. Otherwise, the value in the destination operand is loaded into EDX:EAX (or RDX:RAX). The destination operand is an 8-byte memory location (or 16-byte memory location if operand size is 128 bits). For the EDX:EAX and ECX:EBX register pairs, EDX and ECX contain the high-order 32 bits and EAX and EBX contain the low-order 32 bits of a 64-bit value. For the RDX:RAX and RCX:RBX register pairs, RDX and RCX contain the high-order 64 bits and RAX and RBX contain the low-order 64bits of a 128-bit value.
This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically. To simplify the interface to the processor\u2019s bus, the destination operand receives a write cycle without regard to the result of the comparison. The destination operand is written back if the comparison fails; otherwise, the source operand is written into the destination. (The processor never produces a locked read without also producing a locked write.)
In 64-bit mode, default operation size is 64 bits. Use of the REX.W prefix promotes operation to 128 bits. Note that CMPXCHG16B requires that the destination (memory) operand be 16-byte aligned. See the summary chart at the beginning of this section for encoding data and limits. For information on the CPUID flag that indicates CMPXCHG16B, see page 3-237.
This instruction encoding is not supported on Intel processors earlier than the Pentium processors.
", "tooltip": "Compares the 64-bit value in EDX:EAX (or 128-bit value in RDX:RAX if operand size is 128 bits) with the operand (destination operand). If the values are equal, the 64-bit value in ECX:EBX (or 128-bit value in RCX:RBX) is stored in the destination operand. Otherwise, the value in the destination operand is loaded into EDX:EAX (or RDX:RAX). The destination operand is an 8-byte memory location (or 16-byte memory location if operand size is 128 bits). For the EDX:EAX and ECX:EBX register pairs, EDX and ECX contain the high-order 32 bits and EAX and EBX contain the low-order 32 bits of a 64-bit value. For the RDX:RAX and RCX:RBX register pairs, RDX and RCX contain the high-order 64 bits and RAX and RBX contain the low-order 64bits of a 128-bit value.", "url": "http://www.felixcloutier.com/x86/CMPXCHG8B%3ACMPXCHG16B.html" }; @@ -547,7 +555,7 @@ export function getAsmOpcode(opcode) { case "COMISD": case "VCOMISD": return { - "html": "Compares the double-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
Operand 1 is an XMM register; operand 2 can be an XMM register or a 64 bit memory
location. The COMISD instruction differs from the UCOMISD instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISD instruction signals an invalid numeric exception only if a source operand is an SNaN.
The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.
VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", + "html": "Compares the double-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
Operand 1 is an XMM register; operand 2 can be an XMM register or a 64 bit memory location. The COMISD instruction differs from the UCOMISD instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISD instruction signals an invalid operation exception only if a source operand is an SNaN.
The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.
VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
Software should ensure VCOMISD is encoded with VEX.L=0. Encoding VCOMISD with VEX.L=1 may encounter unpredictable behavior across different processor generations.
", "tooltip": "Compares the double-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).", "url": "http://www.felixcloutier.com/x86/COMISD.html" }; @@ -555,7 +563,7 @@ export function getAsmOpcode(opcode) { case "COMISS": case "VCOMISS": return { - "html": "Compares the single-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.
The COMISS instruction differs from the UCOMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISS instruction signals an invalid numeric exception only if a source operand is an SNaN.
The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.
VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", + "html": "Compares the single-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.
The COMISS instruction differs from the UCOMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISS instruction signals an invalid operation exception only if a source operand is an SNaN.
The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.
VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", "tooltip": "Compares the single-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).", "url": "http://www.felixcloutier.com/x86/COMISS.html" }; @@ -593,7 +601,7 @@ export function getAsmOpcode(opcode) { case "CVTPD2DQ": case "VCVTPD2DQ": return { - "html": "Converts packed double-precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1. The upper bits (MAXVL-1:256/128/64) of the corresponding destination are zeroed.
VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:64) of the corresponding ZMM register destination are zeroed.
", + "html": "Converts packed double-precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1. The upper bits (MAXVL-1:256/128/64) of the corresponding destination are zeroed.
VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:64) of the corresponding ZMM register destination are zeroed.
", "tooltip": "Converts packed double-precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).", "url": "http://www.felixcloutier.com/x86/CVTPD2DQ.html" }; @@ -630,7 +638,7 @@ export function getAsmOpcode(opcode) { case "CVTPS2DQ": case "VCVTPS2DQ": return { - "html": "Converts four, eight or sixteen packed single-precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX encoded versions: The source operand is a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated with writemask k1.
VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
", + "html": "Converts four, eight or sixteen packed single-precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX encoded versions: The source operand is a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated with writemask k1.
VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
", "tooltip": "Converts four, eight or sixteen packed single-precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.", "url": "http://www.felixcloutier.com/x86/CVTPS2DQ.html" }; @@ -693,8 +701,8 @@ export function getAsmOpcode(opcode) { case "CVTSS2SI": case "VCVTSS2SI": return { - "html": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to produce 64-bit data. See the summary chart at the beginning of this section for encoding data and limits.
VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.
Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", - "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.", + "html": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to produce 64-bit data. See the summary chart at the beginning of this section for encoding data and limits.
VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.
Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", + "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.", "url": "http://www.felixcloutier.com/x86/CVTSS2SI.html" }; @@ -739,8 +747,8 @@ export function getAsmOpcode(opcode) { case "CVTTSS2SI": case "VCVTTSS2SI": return { - "html": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.
When a conversion is inexact, a truncated (round toward zero) result is returned. If a converted result is larger than the maximum signed doubleword integer, the floating-point invalid exception is raised. If this exception is masked, the indefinite integer value (80000000H or 80000000_00000000H if operand size is 64 bits) is returned.
Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to 64-bit operation. See the summary chart at the beginning of this section for encoding data and limits.
VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.
Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", - "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.", + "html": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.
When a conversion is inexact, a truncated (round toward zero) result is returned. If a converted result is larger than the maximum signed doubleword integer, the floating-point invalid exception is raised. If this exception is masked, the indefinite integer value (80000000H or 80000000_00000000H if operand size is 64 bits) is returned.
Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to 64-bit operation. See the summary chart at the beginning of this section for encoding data and limits.
VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.
Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", + "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.", "url": "http://www.felixcloutier.com/x86/CVTTSS2SI.html" }; @@ -829,6 +837,13 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/DPPS.html" }; + case "EAX": + return { + "html": "This leaf function copies a page from regular main memory to the EPC. As part of the copying process, the page is cryptographically authenticated and decrypted. This instruction can only be executed when current privilege level is 0.
The ELDB leaf function sets the BLOCK bit in the EPCM entry for the destination page in the EPC after copying. The ELDU leaf function clears the BLOCK bit in the EPCM entry for the destination page in the EPC after copying.
RBX contains the effective address of a PAGEINFO structure; RCX contains the effective address of the destination EPC page; RDX holds the effective address of the version array slot that holds the version of the page.
The ELDBC/ELDUC leafs are very similar to ELDB and ELDU. They provide an error code on the concurrency conflict for any of the pages which need to acquire a lock. These include the destination, SECS, and VA slot.
The table below provides additional information on the memory parameter of ELDB/ELDU leaf functions.
", + "tooltip": "This leaf function copies a page from regular main memory to the EPC. As part of the copying process, the page is cryptographically authenticated and decrypted. This instruction can only be executed when current privilege level is 0.", + "url": "http://www.felixcloutier.com/x86/ELDB%3AELDU%3AELDBC%3AELDUC.html" + }; + case "EMMS": return { "html": "Sets the values of all the tags in the x87 FPU tag word to empty (all 1s). This operation marks the x87 FPU data registers (which are aliased to the MMX technology registers) as available for use by x87 FPU floating-point instructions. (See Figure 8-7 in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for the format of the x87 FPU tag word.) All other MMX instructions (other than the EMMS instruction) set all the tags in x87 FPU tag word to valid (all 0s).
The EMMS instruction must be used to clear the MMX technology state at the end of all MMX technology procedures or subroutines and before calling other procedures or subroutines that may execute x87 floating-point instructions. If a floating-point instruction loads one of the registers in the x87 FPU data register stack before the x87 FPU tag word has been reset by the EMMS instruction, an x87 floating-point register stack overflow can occur that will result in an x87 floating-point exception or incorrect result.
EMMS operation is the same in non-64-bit modes and 64-bit mode.
", @@ -843,6 +858,20 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/ENCLV.html" }; + case "ENDBR32": + return { + "html": "Terminate an indirect branch in 32 bit and compatibility mode.
", + "tooltip": "Terminate an indirect branch in 32 bit and compatibility mode.", + "url": "http://www.felixcloutier.com/x86/ENDBR32.html" + }; + + case "ENDBR64": + return { + "html": "Terminate an indirect branch in 64 bit mode.
", + "tooltip": "Terminate an indirect branch in 64 bit mode.", + "url": "http://www.felixcloutier.com/x86/ENDBR64.html" + }; + case "ENTER": return { "html": "Creates a stack frame (comprising of space for dynamic storage and 1-32 frame pointer storage) for a procedure. The first operand (imm16) specifies the size of the dynamic storage in the stack frame (that is, the number of bytes of dynamically allocated on the stack for the procedure). The second operand (imm8) gives the lexical nesting level (0 to 31) of the procedure. The nesting level (imm8 mod 32) and the OperandSize attribute determine the size in bytes of the storage space for frame pointers.
The nesting level determines the number of frame pointers that are copied into the \u201cdisplay area\u201d of the new stack frame from the preceding frame. The default size of the frame pointer is the StackAddrSize attribute, but can be overridden using the 66H prefix. Thus, the OperandSize attribute determines the size of each frame pointer that will be copied into the stack frame and the data being transferred from SP/ESP/RSP register into the BP/EBP/RBP register.
The ENTER and companion LEAVE instructions are provided to support block structured languages. The ENTER instruction (when used) is typically the first instruction in a procedure and is used to set up a new stack frame for a procedure. The LEAVE instruction is then used at the end of the procedure (just before the RET instruction) to release the stack frame.
If the nesting level is 0, the processor pushes the frame pointer from the BP/EBP/RBP register onto the stack, copies the current stack pointer from the SP/ESP/RSP register into the BP/EBP/RBP register, and loads the SP/ESP/RSP register with the current stack-pointer value minus the value in the size operand. For nesting levels of 1 or greater, the processor pushes additional frame pointers on the stack before adjusting the stack pointer. These additional frame pointers provide the called procedure with access points to other nested frames on the stack. See \u201cProcedure Calls for Block-Structured Languages\u201d in Chapter 6 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for more information about the actions of the ENTER instruction.
The ENTER instruction causes a page fault whenever a write using the final value of the stack pointer (within the current stack segment) would do so.
", @@ -858,20 +887,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/EXTRACTPS.html" }; - case "F2XM1": - return { - "html": "Computes the exponential value of 2 to the power of the source operand minus 1. The source operand is located in register ST(0) and the result is also stored in ST(0). The value of the source operand must lie in the range \u20131.0 to +1.0. If the source value is outside this range, the result is undefined.
The following table shows the results obtained when computing the exponential value of various classes of numbers, assuming that neither overflow nor underflow occurs.
Values other than 2 can be exponentiated using the following formula:
", - "tooltip": "Computes the exponential value of 2 to the power of the source operand minus 1. The source operand is located in register ST(0) and the result is also stored in ST(0). The value of the source operand must lie in the range \u20131.0 to +1.0. If the source value is outside this range, the result is undefined.", - "url": "http://www.felixcloutier.com/x86/F2XM1.html" - }; - - case "FABS": - return { - "html": "Clears the sign bit of ST(0) to create the absolute value of the operand. The following table shows the results obtained when creating the absolute value of various classes of numbers.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Clears the sign bit of ST(0) to create the absolute value of the operand. The following table shows the results obtained when creating the absolute value of various classes of numbers.", - "url": "http://www.felixcloutier.com/x86/FABS.html" - }; - case "FADD": case "FADDP": case "FIADD": @@ -888,24 +903,10 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FBLD.html" }; - case "FBSTP": - return { - "html": "Converts the value in the ST(0) register to an 18-digit packed BCD integer, stores the result in the destination operand, and pops the register stack. If the source value is a non-integral value, it is rounded to an integer value, according to rounding mode specified by the RC field of the FPU control word. To pop the register stack, the processor marks the ST(0) register as empty and increments the stack pointer (TOP) by 1.
The destination operand specifies the address where the first byte destination value is to be stored. The BCD value (including its sign bit) requires 10 bytes of space in memory.
The following table shows the results obtained when storing various classes of numbers in packed BCD format.
If the converted value is too large for the destination format, or if the source operand is an \u221e, SNaN, QNAN, or is in an unsupported format, an invalid-arithmetic-operand condition is signaled. If the invalid-operation exception is not masked, an invalid-arithmetic-operand exception (#IA) is generated and no value is stored in the destination operand. If the invalid-operation exception is masked, the packed BCD indefinite value is stored in memory.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Converts the value in the ST(0) register to an 18-digit packed BCD integer, stores the result in the destination operand, and pops the register stack. If the source value is a non-integral value, it is rounded to an integer value, according to rounding mode specified by the RC field of the FPU control word. To pop the register stack, the processor marks the ST(0) register as empty and increments the stack pointer (TOP) by 1.", - "url": "http://www.felixcloutier.com/x86/FBSTP.html" - }; - - case "FCHS": - return { - "html": "Complements the sign bit of ST(0). This operation changes a positive value into a negative value of equal magnitude or vice versa. The following table shows the results obtained when changing the sign of various classes of numbers.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Complements the sign bit of ST(0). This operation changes a positive value into a negative value of equal magnitude or vice versa. The following table shows the results obtained when changing the sign of various classes of numbers.", - "url": "http://www.felixcloutier.com/x86/FCHS.html" - }; - case "FCLEX": case "FNCLEX": return { - "html": "Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), the exception summary status flag (ES), the stack fault flag (SF), and the busy flag (B) in the FPU status word. The FCLEX instruction checks for and handles any pending unmasked floating-point exceptions before clearing the exception flags; the FNCLEX instruction does not.
The assembler issues two instructions for the FCLEX instruction (an FWAIT instruction followed by an FNCLEX instruction), and the processor executes each of these instructions separately. If an exception is generated for either of these instructions, the save EIP points to the instruction that caused the exception.
When operating a Pentium or Intel486 processor in MS-DOS* compatibility mode, it is possible (under unusual circumstances) for an FNCLEX instruction to be interrupted prior to being executed to handle a pending FPU exception. See the section titled \u201cNo-Wait FPU Instructions Can Get FPU Interrupt in Window\u201d in Appendix D of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for a description of these circumstances. An FNCLEX instruction cannot be interrupted in this way on later Intel processors, except for the Intel QuarkTM X1000 processor.
This instruction affects only the x87 FPU floating-point exception flags. It does not affect the SIMD floating-point exception flags in the MXCRS register.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", + "html": "Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), the exception summary status flag (ES), the stack fault flag (SF), and the busy flag (B) in the FPU status word. The FCLEX instruction checks for and handles any pending unmasked floating-point exceptions before clearing the exception flags; the FNCLEX instruction does not.
The assembler issues two instructions for the FCLEX instruction (an FWAIT instruction followed by an FNCLEX instruction), and the processor executes each of these instructions separately. If an exception is generated for either of these instructions, the save EIP points to the instruction that caused the exception.
When operating a Pentium or Intel486 processor in MS-DOS* compatibility mode, it is possible (under unusual circumstances) for an FNCLEX instruction to be interrupted prior to being executed to handle a pending FPU exception. See the section titled \u201cNo-Wait FPU Instructions Can Get FPU Interrupt in Window\u201d in Appendix D of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for a description of these circumstances. An FNCLEX instruction cannot be interrupted in this way on later Intel processors, except for the Intel QuarkTM X1000 processor.
This instruction affects only the x87 FPU floating-point exception flags. It does not affect the SIMD floating-point exception flags in the MXCSR register.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", "tooltip": "Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), the exception summary status flag (ES), the stack fault flag (SF), and the busy flag (B) in the FPU status word. The FCLEX instruction checks for and handles any pending unmasked floating-point exceptions before clearing the exception flags; the FNCLEX instruction does not.", "url": "http://www.felixcloutier.com/x86/FCLEX%3AFNCLEX.html" }; @@ -943,20 +944,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FCOMI%3AFCOMIP%3AFUCOMI%3AFUCOMIP.html" }; - case "FCOS": - return { - "html": "Computes the approximate cosine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u2212263 to +263. The following table shows the results obtained when taking the cosine of various classes of numbers.
If the source operand is outside the acceptable range, the C2 flag in the FPU status word is set, and the value in register ST(0) remains unchanged. The instruction does not raise an exception when the source operand is out of range. It is up to the program to check the C2 flag for out-of-range conditions. Source values outside the range \u2212 263 to +263 can be reduced to the range of the instruction by subtracting an appropriate integer multiple of 2\u03c0. However, even within the range -263 to +263, inaccurate results can occur because the finite approximation of \u03c0 used internally for argument reduction is not sufficient in all cases. Therefore, for accurate results it is safe to apply FCOS only to arguments reduced accurately in software, to a value smaller in absolute value than 3\u03c0/8. See the sections titled \u201cApproximation of Pi\u201d and \u201cTranscendental Instruction Accuracy\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for a discussion of the proper value to use for \u03c0 in performing such reductions.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Computes the approximate cosine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u2212263 to +263. The following table shows the results obtained when taking the cosine of various classes of numbers.", - "url": "http://www.felixcloutier.com/x86/FCOS.html" - }; - - case "FDECSTP": - return { - "html": "Subtracts one from the TOP field of the FPU status word (decrements the top-of-stack pointer). If the TOP field contains a 0, it is set to 7. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Subtracts one from the TOP field of the FPU status word (decrements the top-of-stack pointer). If the TOP field contains a 0, it is set to 7. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected.", - "url": "http://www.felixcloutier.com/x86/FDECSTP.html" - }; - case "FDIV": case "FDIVP": case "FIDIV": @@ -975,17 +962,10 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FDIVR%3AFDIVRP%3AFIDIVR.html" }; - case "FFREE": - return { - "html": "Sets the tag in the FPU tag register associated with register ST(i) to empty (11B). The contents of ST(i) and the FPU stack-top pointer (TOP) are not affected.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Sets the tag in the FPU tag register associated with register ST(i) to empty (11B). The contents of ST(i) and the FPU stack-top pointer (TOP) are not affected.", - "url": "http://www.felixcloutier.com/x86/FFREE.html" - }; - case "FICOM": case "FICOMP": return { - "html": "Compares the value in ST(0) with an integer source operand and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below). The integer value is converted to double extended-precision floating-point format before the comparison is made.
These instructions perform an \u201cunordered comparison.\u201d An unordered comparison also checks the class of the numbers being compared (see \u201cFXAM\u2014Examine Floating-Point\u201d in this chapter). If either operand is a NaN or is in an undefined format, the condition flags are set to \u201cunordered.\u201d
The sign of zero is ignored, so that \u20130.0 \u2190 +0.0.
The FICOMP instructions pop the register stack following the comparison. To pop the register stack, the processor marks the ST(0) register empty and increments the stack pointer (TOP) by 1.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", + "html": "Compares the value in ST(0) with an integer source operand and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below). The integer value is converted to double extended-precision floating-point format before the comparison is made.
These instructions perform an \u201cunordered comparison.\u201d An unordered comparison also checks the class of the numbers being compared (see \u201cFXAM\u2014Examine Floating-Point\u201d in this chapter). If either operand is a NaN or is in an undefined format, the condition flags are set to \u201cunordered.\u201d
The sign of zero is ignored, so that \u20130.0 := +0.0.
The FICOMP instructions pop the register stack following the comparison. To pop the register stack, the processor marks the ST(0) register empty and increments the stack pointer (TOP) by 1.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", "tooltip": "Compares the value in ST(0) with an integer source operand and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below). The integer value is converted to double extended-precision floating-point format before the comparison is made.", "url": "http://www.felixcloutier.com/x86/FICOM%3AFICOMP.html" }; @@ -997,13 +977,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FILD.html" }; - case "FINCSTP": - return { - "html": "Adds one to the TOP field of the FPU status word (increments the top-of-stack pointer). If the TOP field contains a 7, it is set to 0. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected. This operation is not equivalent to popping the stack, because the tag for the previous top-of-stack register is not marked empty.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Adds one to the TOP field of the FPU status word (increments the top-of-stack pointer). If the TOP field contains a 7, it is set to 0. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected. This operation is not equivalent to popping the stack, because the tag for the previous top-of-stack register is not marked empty.", - "url": "http://www.felixcloutier.com/x86/FINCSTP.html" - }; - case "FINIT": case "FNINIT": return { @@ -1047,20 +1020,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FLD1%3AFLDL2T%3AFLDL2E%3AFLDPI%3AFLDLG2%3AFLDLN2%3AFLDZ.html" }; - case "FLDCW": - return { - "html": "Loads the 16-bit source operand into the FPU control word. The source operand is a memory location. This instruction is typically used to establish or change the FPU\u2019s mode of operation.
If one or more exception flags are set in the FPU status word prior to loading a new FPU control word and the new control word unmasks one or more of those exceptions, a floating-point exception will be generated upon execution of the next floating-point instruction (except for the no-wait floating-point instructions, see the section titled \u201cSoftware Exception Handling\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1). To avoid raising exceptions when changing FPU operating modes, clear any pending exceptions (using the FCLEX or FNCLEX instruction) before loading the new control word.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Loads the 16-bit source operand into the FPU control word. The source operand is a memory location. This instruction is typically used to establish or change the FPU\u2019s mode of operation.", - "url": "http://www.felixcloutier.com/x86/FLDCW.html" - }; - - case "FLDENV": - return { - "html": "Loads the complete x87 FPU operating environment from memory into the FPU registers. The source operand specifies the first byte of the operating-environment data in memory. This data is typically written to the specified memory location by a FSTENV or FNSTENV instruction.
The FPU operating environment consists of the FPU control word, status word, tag word, instruction pointer, data pointer, and last opcode. Figures 8-9 through 8-12 in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, show the layout in memory of the loaded environment, depending on the operating mode of the processor (protected or real) and the current operand-size attribute (16-bit or 32-bit). In virtual-8086 mode, the real mode layouts are used.
The FLDENV instruction should be executed in the same operating mode as the corresponding FSTENV/FNSTENV instruction.
If one or more unmasked exception flags are set in the new FPU status word, a floating-point exception will be generated upon execution of the next floating-point instruction (except for the no-wait floating-point instructions, see the section titled \u201cSoftware Exception Handling\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1). To avoid generating exceptions when loading a new environment, clear all the exception flags in the FPU status word that is being loaded.
If a page or limit fault occurs during the execution of this instruction, the state of the x87 FPU registers as seen by the fault handler may be different than the state being loaded from memory. In such situations, the fault handler should ignore the status of the x87 FPU registers, handle the fault, and return. The FLDENV instruction will then complete the loading of the x87 FPU registers with no resulting context inconsistency.
", - "tooltip": "Loads the complete x87 FPU operating environment from memory into the FPU registers. The source operand specifies the first byte of the operating-environment data in memory. This data is typically written to the specified memory location by a FSTENV or FNSTENV instruction.", - "url": "http://www.felixcloutier.com/x86/FLDENV.html" - }; - case "FIMUL": case "FMUL": case "FMULP": @@ -1070,30 +1029,16 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FMUL%3AFMULP%3AFIMUL.html" }; - case "FNOP": - return { - "html": "Performs no FPU operation. This instruction takes up space in the instruction stream but does not affect the FPU or machine context, except the EIP register and the FPU Instruction Pointer.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Performs no FPU operation. This instruction takes up space in the instruction stream but does not affect the FPU or machine context, except the EIP register and the FPU Instruction Pointer.", - "url": "http://www.felixcloutier.com/x86/FNOP.html" - }; - - case "FPATAN": - return { - "html": "Computes the arctangent of the source operand in register ST(1) divided by the source operand in register ST(0), stores the result in ST(1), and pops the FPU register stack. The result in register ST(0) has the same sign as the source operand ST(1) and a magnitude less than +\u03c0.
The FPATAN instruction returns the angle between the X axis and the line from the origin to the point (X,Y), where Y (the ordinate) is ST(1) and X (the abscissa) is ST(0). The angle depends on the sign of X and Y independently, not just on the sign of the ratio Y/X. This is because a point (\u2212X,Y) is in the second quadrant, resulting in an angle between \u03c0/2 and \u03c0, while a point (X,\u2212Y) is in the fourth quadrant, resulting in an angle between 0 and \u2212\u03c0/2. A point (\u2212X,\u2212Y) is in the third quadrant, giving an angle between \u2212\u03c0/2 and \u2212\u03c0.
The following table shows the results obtained when computing the arctangent of various classes of numbers, assuming that underflow does not occur.
There is no restriction on the range of source operands that FPATAN can accept.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Computes the arctangent of the source operand in register ST(1) divided by the source operand in register ST(0), stores the result in ST(1), and pops the FPU register stack. The result in register ST(0) has the same sign as the source operand ST(1) and a magnitude less than +\u03c0.", - "url": "http://www.felixcloutier.com/x86/FPATAN.html" - }; - case "FPREM": return { - "html": "Computes the remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value:
Remainder \u2190 ST(0) \u2212 (Q \u2217 ST(1))
Here, Q is an integer value that is obtained by truncating the floating-point number quotient of [ST(0) / ST(1)] toward zero. The sign of the remainder is the same as the sign of the dividend. The magnitude of the remainder is less than that of the modulus, unless a partial remainder was computed (as described below).
This instruction produces an exact result; the inexact-result exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.
When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).
", + "html": "Computes the remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value:
Remainder := ST(0) \u2212 (Q \u2217 ST(1))
Here, Q is an integer value that is obtained by truncating the floating-point number quotient of [ST(0) / ST(1)] toward zero. The sign of the remainder is the same as the sign of the dividend. The magnitude of the remainder is less than that of the modulus, unless a partial remainder was computed (as described below).
This instruction produces an exact result; the inexact-result exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.
When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).
", "tooltip": "Computes the remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value", "url": "http://www.felixcloutier.com/x86/FPREM.html" }; case "FPREM1": return { - "html": "Computes the IEEE remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value:
Remainder \u2190 ST(0) \u2212 (Q \u2217 ST(1))
Here, Q is an integer value that is obtained by rounding the floating-point number quotient of [ST(0) / ST(1)] toward the nearest integer value. The magnitude of the remainder is less than or equal to half the magnitude of the modulus, unless a partial remainder was computed (as described below).
This instruction produces an exact result; the precision (inexact) exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.
When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).
", + "html": "Computes the IEEE remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value:
Remainder := ST(0) \u2212 (Q \u2217 ST(1))
Here, Q is an integer value that is obtained by rounding the floating-point number quotient of [ST(0) / ST(1)] toward the nearest integer value. The magnitude of the remainder is less than or equal to half the magnitude of the modulus, unless a partial remainder was computed (as described below).
This instruction produces an exact result; the precision (inexact) exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.
When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).
", "tooltip": "Computes the IEEE remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value", "url": "http://www.felixcloutier.com/x86/FPREM1.html" }; @@ -1105,20 +1050,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FPTAN.html" }; - case "FRNDINT": - return { - "html": "Rounds the source value in the ST(0) register to the nearest integral value, depending on the current rounding mode (setting of the RC field of the FPU control word), and stores the result in ST(0).
If the source value is \u221e, the value is not changed. If the source value is not an integral value, the floating-point inexact-result exception (#P) is generated.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Rounds the source value in the ST(0) register to the nearest integral value, depending on the current rounding mode (setting of the RC field of the FPU control word), and stores the result in ST(0).", - "url": "http://www.felixcloutier.com/x86/FRNDINT.html" - }; - - case "FRSTOR": - return { - "html": "Loads the FPU state (operating environment and register stack) from the memory area specified with the source operand. This state data is typically written to the specified memory location by a previous FSAVE/FNSAVE instruction.
The FPU operating environment consists of the FPU control word, status word, tag word, instruction pointer, data pointer, and last opcode. Figures 8-9 through 8-12 in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, show the layout in memory of the stored environment, depending on the operating mode of the processor (protected or real) and the current operand-size attribute (16-bit or 32-bit). In virtual-8086 mode, the real mode layouts are used. The contents of the FPU register stack are stored in the 80 bytes immediately following the operating environment image.
The FRSTOR instruction should be executed in the same operating mode as the corresponding FSAVE/FNSAVE instruction.
If one or more unmasked exception bits are set in the new FPU status word, a floating-point exception will be generated. To avoid raising exceptions when loading a new operating environment, clear all the exception flags in the FPU status word that is being loaded.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Loads the FPU state (operating environment and register stack) from the memory area specified with the source operand. This state data is typically written to the specified memory location by a previous FSAVE/FNSAVE instruction.", - "url": "http://www.felixcloutier.com/x86/FRSTOR.html" - }; - case "FNSAVE": case "FSAVE": return { @@ -1127,20 +1058,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FSAVE%3AFNSAVE.html" }; - case "FSCALE": - return { - "html": "Truncates the value in the source operand (toward 0) to an integral value and adds that value to the exponent of the destination operand. The destination and source operands are floating-point values located in registers ST(0) and ST(1), respectively. This instruction provides rapid multiplication or division by integral powers of 2. The following table shows the results obtained when scaling various classes of numbers, assuming that neither overflow nor underflow occurs.
In most cases, only the exponent is changed and the mantissa (significand) remains unchanged. However, when the value being scaled in ST(0) is a denormal value, the mantissa is also changed and the result may turn out to be a normalized number. Similarly, if overflow or underflow results from a scale operation, the resulting mantissa will differ from the source\u2019s mantissa.
The FSCALE instruction can also be used to reverse the action of the FXTRACT instruction, as shown in the following example:
", - "tooltip": "Truncates the value in the source operand (toward 0) to an integral value and adds that value to the exponent of the destination operand. The destination and source operands are floating-point values located in registers ST(0) and ST(1), respectively. This instruction provides rapid multiplication or division by integral powers of 2. The following table shows the results obtained when scaling various classes of numbers, assuming that neither overflow nor underflow occurs.", - "url": "http://www.felixcloutier.com/x86/FSCALE.html" - }; - - case "FSIN": - return { - "html": "Computes an approximation of the sine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u2212263 to +263. The following table shows the results obtained when taking the sine of various classes of numbers, assuming that underflow does not occur.
If the source operand is outside the acceptable range, the C2 flag in the FPU status word is set, and the value in register ST(0) remains unchanged. The instruction does not raise an exception when the source operand is out of range. It is up to the program to check the C2 flag for out-of-range conditions. Source values outside the range \u2212 263 to +263 can be reduced to the range of the instruction by subtracting an appropriate integer multiple of 2\u03c0. However, even within the range -263 to +263, inaccurate results can occur because the finite approximation of \u03c0 used internally for argument reduction is not sufficient in all cases. Therefore, for accurate results it is safe to apply FSIN only to arguments reduced accurately in software, to a value smaller in absolute value than 3\u03c0/4. See the sections titled \u201cApproximation of Pi\u201d and \u201cTranscendental Instruction Accuracy\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for a discussion of the proper value to use for \u03c0 in performing such reductions.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Computes an approximation of the sine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u2212263 to +263. The following table shows the results obtained when taking the sine of various classes of numbers, assuming that underflow does not occur.", - "url": "http://www.felixcloutier.com/x86/FSIN.html" - }; - case "FSINCOS": return { "html": "Computes both the approximate sine and the cosine of the source operand in register ST(0), stores the sine in ST(0), and pushes the cosine onto the top of the FPU register stack. (This instruction is faster than executing the FSIN and FCOS instructions in succession.)
The source operand must be given in radians and must be within the range \u2212263 to +263. The following table shows the results obtained when taking the sine and cosine of various classes of numbers, assuming that underflow does not occur.
If the source operand is outside the acceptable range, the C2 flag in the FPU status word is set, and the value in register ST(0) remains unchanged. The instruction does not raise an exception when the source operand is out of range. It is up to the program to check the C2 flag for out-of-range conditions. Source values outside the range \u2212 263 to +263 can be reduced to the range of the instruction by subtracting an appropriate integer multiple of 2\u03c0. However, even within the range -263 to +263, inaccurate results can occur because the finite approximation of \u03c0 used internally for argument reduction is not sufficient in all cases. Therefore, for accurate results it is safe to apply FSINCOS only to arguments reduced accurately in software, to a value smaller in absolute value than 3\u03c0/8. See the sections titled \u201cApproximation of Pi\u201d and \u201cTranscendental Instruction Accuracy\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for a discussion of the proper value to use for \u03c0 in performing such reductions.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", @@ -1148,13 +1065,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FSINCOS.html" }; - case "FSQRT": - return { - "html": "Computes the square root of the source value in the ST(0) register and stores the result in ST(0).
The following table shows the results obtained when taking the square root of various classes of numbers, assuming that neither overflow nor underflow occurs.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Computes the square root of the source value in the ST(0) register and stores the result in ST(0).", - "url": "http://www.felixcloutier.com/x86/FSQRT.html" - }; - case "FST": case "FSTP": return { @@ -1205,13 +1115,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FSUBR%3AFSUBRP%3AFISUBR.html" }; - case "FTST": - return { - "html": "Compares the value in the ST(0) register with 0.0 and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below).
This instruction performs an \u201cunordered comparison.\u201d An unordered comparison also checks the class of the numbers being compared (see \u201cFXAM\u2014Examine Floating-Point\u201d in this chapter). If the value in register ST(0) is a NaN or is in an undefined format, the condition flags are set to \u201cunordered\u201d and the invalid operation exception is generated.
The sign of zero is ignored, so that (\u2013 0.0 \u2190 +0.0).
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Compares the value in the ST(0) register with 0.0 and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below).", - "url": "http://www.felixcloutier.com/x86/FTST.html" - }; - case "FUCOM": case "FUCOMP": case "FUCOMPP": @@ -1221,13 +1124,6 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/FUCOM%3AFUCOMP%3AFUCOMPP.html" }; - case "FXAM": - return { - "html": "Examines the contents of the ST(0) register and sets the condition code flags C0, C2, and C3 in the FPU status word to indicate the class of value or number in the register (see the table below).
The C1 flag is set to the sign of the value in ST(0), regardless of whether the register is empty or full.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", - "tooltip": "Examines the contents of the ST(0) register and sets the condition code flags C0, C2, and C3 in the FPU status word to indicate the class of value or number in the register (see the table below).", - "url": "http://www.felixcloutier.com/x86/FXAM.html" - }; - case "FXCH": return { "html": "Exchanges the contents of registers ST(0) and ST(i). If no source operand is specified, the contents of ST(0) and ST(1) are exchanged.
This instruction provides a simple means of moving values in the FPU register stack to the top of the stack [ST(0)], so that they can be operated on by those floating-point instructions that can only operate on values in ST(0). For example, the following instruction sequence takes the square root of the third register from the top of the register stack:
", @@ -1251,25 +1147,46 @@ export function getAsmOpcode(opcode) { case "FXTRACT": return { - "html": "Separates the source value in the ST(0) register into its exponent and significand, stores the exponent in ST(0), and pushes the significand onto the register stack. Following this operation, the new top-of-stack register ST(0) contains the value of the original significand expressed as a floating-point value. The sign and significand of this value are the same as those found in the source operand, and the exponent is 3FFFH (biased value for a true exponent of zero). The ST(1) register contains the value of the original operand\u2019s true (unbiased) exponent expressed as a floating-point value. (The operation performed by this instruction is a superset of the IEEE-recommended logb(x) function.)
This instruction and the F2XM1 instruction are useful for performing power and range scaling operations. The FXTRACT instruction is also useful for converting numbers in double extended-precision floating-point format to decimal representations (e.g., for printing or displaying).
If the floating-point zero-divide exception (#Z) is masked and the source operand is zero, an exponent value of \u2013\u221e is stored in register ST(1) and 0 with the sign of the source operand is stored in register ST(0).
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", + "html": "Separates the source value in the ST(0) register into its exponent and significand, stores the exponent in ST(0), and pushes the significand onto the register stack. Following this operation, the new top-of-stack register ST(0) contains the value of the original significand expressed as a floating-point value. The sign and significand of this value are the same as those found in the source operand, and the exponent is 3FFFH (biased value for a true exponent of zero). The ST(1) register contains the value of the original operand\u2019s true (unbiased) exponent expressed as a floating-point value. (The operation performed by this instruction is a superset of the IEEE-recommended logb(x) function.)
This instruction and the F2XM1 instruction are useful for performing power and range scaling operations. The FXTRACT instruction is also useful for converting numbers in double extended-precision floating-point format to decimal representations (e.g., for printing or displaying).
If the floating-point zero-divide exception (#Z) is masked and the source operand is zero, an exponent value of \u2013 \u221e is stored in register ST(1) and 0 with the sign of the source operand is stored in register ST(0).
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", "tooltip": "Separates the source value in the ST(0) register into its exponent and significand, stores the exponent in ST(0), and pushes the significand onto the register stack. Following this operation, the new top-of-stack register ST(0) contains the value of the original significand expressed as a floating-point value. The sign and significand of this value are the same as those found in the source operand, and the exponent is 3FFFH (biased value for a true exponent of zero). The ST(1) register contains the value of the original operand\u2019s true (unbiased) exponent expressed as a floating-point value. (The operation performed by this instruction is a superset of the IEEE-recommended logb(x) function.)", "url": "http://www.felixcloutier.com/x86/FXTRACT.html" }; case "FYL2X": return { - "html": "Computes (ST(1) \u2217 log2 (ST(0))), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be a non-zero positive number.
The following table shows the results obtained when taking the log of various classes of numbers, assuming that neither overflow nor underflow occurs.
If the divide-by-zero exception is masked and register ST(0) contains \u00b10, the instruction returns \u221e with a sign that is the opposite of the sign of the source operand in register ST(1).
The FYL2X instruction is designed with a built-in multiplication to optimize the calculation of logarithms with an arbitrary positive base (b):
logbx \u2190 (log2b)\u20131 \u2217 log2x
", + "html": "Computes (ST(1) \u2217 log2 (ST(0))), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be a non-zero positive number.
The following table shows the results obtained when taking the log of various classes of numbers, assuming that neither overflow nor underflow occurs.
If the divide-by-zero exception is masked and register ST(0) contains \u00b10, the instruction returns \u221e with a sign that is the opposite of the sign of the source operand in register ST(1).
The FYL2X instruction is designed with a built-in multiplication to optimize the calculation of logarithms with an arbitrary positive base (b):
logbx := (log2b)\u20131 \u2217 log2x
", "tooltip": "Computes (ST(1) \u2217 log2 (ST(0))), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be a non-zero positive number.", "url": "http://www.felixcloutier.com/x86/FYL2X.html" }; case "FYL2XP1": return { - "html": "Computes (ST(1) \u2217 log2(ST(0) + 1.0)), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be in the range:
The source operand in ST(1) can range from \u2212\u221e to +\u221e. If the ST(0) operand is outside of its acceptable range, the result is undefined and software should not rely on an exception being generated. Under some circumstances exceptions may be generated when ST(0) is out of range, but this behavior is implementation specific and not guaranteed.
The following table shows the results obtained when taking the log epsilon of various classes of numbers, assuming that underflow does not occur.
This instruction provides optimal accuracy for values of epsilon [the value in register ST(0)] that are close to 0. For small epsilon (\u03b5) values, more significant digits can be retained by using the FYL2XP1 instruction than by using (\u03b5+1) as an argument to the FYL2X instruction. The (\u03b5+1) expression is commonly found in compound interest and annuity calculations. The result can be simply converted into a value in another logarithm base by including a scale factor in the ST(1) source operand. The following equation is used to calculate the scale factor for a particular logarithm base, where n is the logarithm base desired for the result of the FYL2XP1 instruction:
scale factor \u2190 logn 2
", + "html": "Computes (ST(1) \u2217 log2(ST(0) + 1.0)), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be in the range:
The source operand in ST(1) can range from \u2212\u221e to +\u221e. If the ST(0) operand is outside of its acceptable range, the result is undefined and software should not rely on an exception being generated. Under some circumstances exceptions may be generated when ST(0) is out of range, but this behavior is implementation specific and not guaranteed.
The following table shows the results obtained when taking the log epsilon of various classes of numbers, assuming that underflow does not occur.
This instruction provides optimal accuracy for values of epsilon [the value in register ST(0)] that are close to 0. For small epsilon (\u03b5) values, more significant digits can be retained by using the FYL2XP1 instruction than by using (\u03b5+1) as an argument to the FYL2X instruction. The (\u03b5+1) expression is commonly found in compound interest and annuity calculations. The result can be simply converted into a value in another logarithm base by including a scale factor in the ST(1) source operand. The following equation is used to calculate the scale factor for a particular logarithm base, where n is the logarithm base desired for the result of the FYL2XP1 instruction:
scale factor := logn 2
", "tooltip": "Computes (ST(1) \u2217 log2(ST(0) + 1.0)), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be in the range", "url": "http://www.felixcloutier.com/x86/FYL2XP1.html" }; + case "VGF2P8AFFINEINVQB": + return { + "html": "The AFFINEINVB instruction computes an affine transformation in the Galois Field 28. For this instruction, an affine transformation is defined by A * inv(x) + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. The inverse of the bytes in x is defined with respect to the reduction polynomial x8 + x4 + x3 + x + 1.
One SIMD register (operand 1) holds \u201cx\u201d as either 16, 32 or 64 8-bit vectors. A second SIMD (operand 2) register or memory operand contains 2, 4, or 8 \u201cA\u201d values, which are operated upon by the correspondingly aligned 8 \u201cx\u201d values in the first register. The \u201cb\u201d vector is constant for all calculations and contained in the immediate byte.
The EVEX encoded form of this instruction does not support memory fault suppression. The SSE encoded forms of the instruction require 16B alignment on their memory operations.
The inverse of each byte is given by the following table. The upper nibble is on the vertical axis and the lower nibble is on the horizontal axis. For example, the inverse of 0x95 is 0x8A.
", + "tooltip": "The AFFINEINVB instruction computes an affine transformation in the Galois Field 28. For this instruction, an affine transformation is defined by A * inv(x) + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. The inverse of the bytes in x is defined with respect to the reduction polynomial x8 + x4 + x3 + x + 1.", + "url": "http://www.felixcloutier.com/x86/GF2P8AFFINEINVQB.html" + }; + + case "VGF2P8AFFINEQB": + return { + "html": "The AFFINEB instruction computes an affine transformation in the Galois Field 28. For this instruction, an affine transformation is defined by A * x + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. One SIMD register (operand 1) holds \u201cx\u201d as either 16, 32 or 64 8-bit vectors. A second SIMD (operand 2) register or memory operand contains 2, 4, or 8 \u201cA\u201d values, which are operated upon by the correspondingly aligned 8 \u201cx\u201d values in the first register. The \u201cb\u201d vector is constant for all calculations and contained in the immediate byte.
The EVEX encoded form of this instruction does not support memory fault suppression. The SSE encoded forms of the instruction require16B alignment on their memory operations.
", + "tooltip": "The AFFINEB instruction computes an affine transformation in the Galois Field 28. For this instruction, an affine transformation is defined by A * x + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. One SIMD register (operand 1) holds \u201cx\u201d as either 16, 32 or 64 8-bit vectors. A second SIMD (operand 2) register or memory operand contains 2, 4, or 8 \u201cA\u201d values, which are operated upon by the correspondingly aligned 8 \u201cx\u201d values in the first register. The \u201cb\u201d vector is constant for all calculations and contained in the immediate byte.", + "url": "http://www.felixcloutier.com/x86/GF2P8AFFINEQB.html" + }; + + case "VGF2P8MULB": + return { + "html": "The instruction multiplies elements in the finite field GF(28), operating on a byte (field element) in the first source operand and the corresponding byte in a second source operand. The field GF(28) is represented in polynomial representation with the reduction polynomial x8 + x4 + x3 + x + 1.
This instruction does not support broadcasting.
The EVEX encoded form of this instruction supports memory fault suppression. The SSE encoded forms of the instruction require16B alignment on their memory operations.
", + "tooltip": "The instruction multiplies elements in the finite field GF(28), operating on a byte (field element) in the first source operand and the corresponding byte in a second source operand. The field GF(28) is represented in polynomial representation with the reduction polynomial x8 + x4 + x3 + x + 1.", + "url": "http://www.felixcloutier.com/x86/GF2P8MULB.html" + }; + case "HADDPD": case "VHADDPD": return { @@ -1293,6 +1210,13 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/HLT.html" }; + case "HRESET": + return { + "html": "Requests the processor to selectively reset selected components of hardware history maintained by the current logical processor. HRESET operation is controlled by the implicit EAX operand. The value of the explicit imm8 operand is ignored. This instruction can only be executed at privilege level 0.
The HRESET instruction can be used to request reset of multiple components of hardware history. Prior to the execution of HRESET, the system software must take the following steps:
1. Enumerate the HRESET capabilities via CPUID.20H.0H:EBX, which indicates what components of hardware history can be reset.
2. Only the bits enumerated by CPUID.20H.0H:EBX can be set in the IA32_HRESET_ENABLE MSR.
HRESET causes a general-protection exception (#GP) if EAX sets any bits that are not set in the IA32_HRESET_ENABLE MSR.
", + "tooltip": "Requests the processor to selectively reset selected components of hardware history maintained by the current logical processor. HRESET operation is controlled by the implicit EAX operand. The value of the explicit imm8 operand is ignored. This instruction can only be executed at privilege level 0.", + "url": "http://www.felixcloutier.com/x86/HRESET.html" + }; + case "HSUBPD": case "VHSUBPD": return { @@ -1325,7 +1249,7 @@ export function getAsmOpcode(opcode) { case "IN": return { - "html": "Copies the value from the I/O port specified with the second operand (source operand) to the destination operand (first operand). The source operand can be a byte-immediate or the DX register; the destination operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively). Using the DX register as a source operand allows I/O port addresses from 0 to 65,535 to be accessed; using a byte immediate allows I/O port addresses 0 to 255 to be accessed.
When accessing an 8-bit I/O port, the opcode determines the port size; when accessing a 16- and 32-bit I/O port, the operand-size attribute determines the port size. At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.
This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 18, \u201cInput/Output,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for more information on accessing I/O ports in the I/O address space.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", + "html": "Copies the value from the I/O port specified with the second operand (source operand) to the destination operand (first operand). The source operand can be a byte-immediate or the DX register; the destination operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively). Using the DX register as a source operand allows I/O port addresses from 0 to 65,535 to be accessed; using a byte immediate allows I/O port addresses 0 to 255 to be accessed.
When accessing an 8-bit I/O port, the opcode determines the port size; when accessing a 16- and 32-bit I/O port, the operand-size attribute determines the port size. At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.
This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 19, \u201cInput/Output,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for more information on accessing I/O ports in the I/O address space.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", "tooltip": "Copies the value from the I/O port specified with the second operand (source operand) to the destination operand (first operand). The source operand can be a byte-immediate or the DX register; the destination operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively). Using the DX register as a source operand allows I/O port addresses from 0 to 65,535 to be accessed; using a byte immediate allows I/O port addresses 0 to 255 to be accessed.", "url": "http://www.felixcloutier.com/x86/IN.html" }; @@ -1337,6 +1261,14 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/INC.html" }; + case "INCSSPD": + case "INCSSPQ": + return { + "html": "This instruction can be used to increment the current shadow stack pointer by the operand size of the instruction times the unsigned 8-bit value specified by bits 7:0 in the source operand. The instruction performs a pop and discard of the first and last element on the shadow stack in the range specified by the unsigned 8-bit value in bits 7:0 of the source operand.
", + "tooltip": "This instruction can be used to increment the current shadow stack pointer by the operand size of the instruction times the unsigned 8-bit value specified by bits 7:0 in the source operand. The instruction performs a pop and discard of the first and last element on the shadow stack in the range specified by the unsigned 8-bit value in bits 7:0 of the source operand.", + "url": "http://www.felixcloutier.com/x86/INCSSPD%3AINCSSPQ.html" + }; + case "INS": case "INSB": case "INSD": @@ -1374,7 +1306,7 @@ export function getAsmOpcode(opcode) { case "INVPCID": return { - "html": "Invalidates mappings in the translation lookaside buffers (TLBs) and paging-structure caches based on process-context identifier (PCID). (See Section 4.10, \u201cCaching Translation Information,\u201d in Intel 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.) Invalidation is based on the INVPCID type specified in the register operand and the INVPCID descriptor specified in the memory operand.
Outside 64-bit mode, the register operand is always 32 bits, regardless of the value of CS.D. In 64-bit mode the register operand has 64 bits.
There are four INVPCID types currently defined:
The INVPCID descriptor comprises 128 bits and consists of a PCID and a linear address as shown in Figure 3-24. For INVPCID type 0, the processor uses the full 64 bits of the linear address even outside 64-bit mode; the linear address is not used for other INVPCID types.
If CR4.PCIDE = 0, a logical processor does not cache information for any PCID other than 000H. In this case, executions with INVPCID types 0 and 1 are allowed only if the PCID specified in the INVPCID descriptor is 000H; executions with INVPCID types 2 and 3 invalidate mappings only for PCID 000H. Note that CR4.PCIDE must be 0 outside 64-bit mode (see Chapter 4.10.1, \u201cProcess-Context Identifiers (PCIDs)\u201a\u201d of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A).
", + "html": "Invalidates mappings in the translation lookaside buffers (TLBs) and paging-structure caches based on process-context identifier (PCID). (See Section 4.10, \u201cCaching Translation Information,\u201d in Intel 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.) Invalidation is based on the INVPCID type specified in the register operand and the INVPCID descriptor specified in the memory operand.
Outside 64-bit mode, the register operand is always 32 bits, regardless of the value of CS.D. In 64-bit mode the register operand has 64 bits.
There are four INVPCID types currently defined:
The INVPCID descriptor comprises 128 bits and consists of a PCID and a linear address as shown in Figure 3-24. For INVPCID type 0, the processor uses the full 64 bits of the linear address even outside 64-bit mode; the linear address is not used for other INVPCID types.
If CR4.PCIDE = 0, a logical processor does not cache information for any PCID other than 000H. In this case, executions with INVPCID types 0 and 1 are allowed only if the PCID specified in the INVPCID descriptor is 000H; executions with INVPCID types 2 and 3 invalidate mappings only for PCID 000H. Note that CR4.PCIDE must be 0 outside IA-32e mode (see Chapter 4.10.1, \u201cProcess-Context Identifiers (PCIDs)\u201a\u201d of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A).
", "tooltip": "Invalidates mappings in the translation lookaside buffers (TLBs) and paging-structure caches based on process-context identifier (PCID). (See Section 4.10, \u201cCaching Translation Information,\u201d in Intel 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.) Invalidation is based on the INVPCID type specified in the register operand and the INVPCID descriptor specified in the memory operand.", "url": "http://www.felixcloutier.com/x86/INVPCID.html" }; @@ -1383,9 +1315,9 @@ export function getAsmOpcode(opcode) { case "IRETD": case "IRETQ": return { - "html": "Returns program control from an exception or interrupt handler to a program or procedure that was interrupted by an exception, an external interrupt, or a software-generated interrupt. These instructions are also used to perform a return from a nested task. (A nested task is created when a CALL instruction is used to initiate a task switch or when an interrupt or exception causes a task switch to an interrupt or exception handler.) See the section titled \u201cTask Linking\u201d in Chapter 7 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A.
IRET and IRETD are mnemonics for the same opcode. The IRETD mnemonic (interrupt return double) is intended for use when returning from an interrupt when using the 32-bit operand size; however, most assemblers use the IRET mnemonic interchangeably for both operand sizes.
In Real-Address Mode, the IRET instruction preforms a far return to the interrupted program or procedure. During this operation, the processor pops the return instruction pointer, return code segment selector, and EFLAGS image from the stack to the EIP, CS, and EFLAGS registers, respectively, and then resumes execution of the interrupted program or procedure.
In Protected Mode, the action of the IRET instruction depends on the settings of the NT (nested task) and VM flags in the EFLAGS register and the VM flag in the EFLAGS image stored on the current stack. Depending on the setting of these flags, the processor performs the following types of interrupt returns:
If the NT flag (EFLAGS register) is cleared, the IRET instruction performs a far return from the interrupt procedure, without a task switch. The code segment being returned to must be equally or less privileged than the interrupt handler routine (as indicated by the RPL field of the code segment selector popped from the stack).
", + "html": "Returns program control from an exception or interrupt handler to a program or procedure that was interrupted by an exception, an external interrupt, or a software-generated interrupt. These instructions are also used to perform a return from a nested task. (A nested task is created when a CALL instruction is used to initiate a task switch or when an interrupt or exception causes a task switch to an interrupt or exception handler.) See the section titled \u201cTask Linking\u201d in Chapter 7 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A.
IRET and IRETD are mnemonics for the same opcode. The IRETD mnemonic (interrupt return double) is intended for use when returning from an interrupt when using the 32-bit operand size; however, most assemblers use the IRET mnemonic interchangeably for both operand sizes.
In Real-Address Mode, the IRET instruction performs a far return to the interrupted program or procedure. During this operation, the processor pops the return instruction pointer, return code segment selector, and EFLAGS image from the stack to the EIP, CS, and EFLAGS registers, respectively, and then resumes execution of the interrupted program or procedure.
In Protected Mode, the action of the IRET instruction depends on the settings of the NT (nested task) and VM flags in the EFLAGS register and the VM flag in the EFLAGS image stored on the current stack. Depending on the setting of these flags, the processor performs the following types of interrupt returns:
If the NT flag (EFLAGS register) is cleared, the IRET instruction performs a far return from the interrupt procedure, without a task switch. The code segment being returned to must be equally or less privileged than the interrupt handler routine (as indicated by the RPL field of the code segment selector popped from the stack).
", "tooltip": "Returns program control from an exception or interrupt handler to a program or procedure that was interrupted by an exception, an external interrupt, or a software-generated interrupt. These instructions are also used to perform a return from a nested task. (A nested task is created when a CALL instruction is used to initiate a task switch or when an interrupt or exception causes a task switch to an interrupt or exception handler.) See the section titled \u201cTask Linking\u201d in Chapter 7 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A.", - "url": "http://www.felixcloutier.com/x86/IRET%3AIRETD.html" + "url": "http://www.felixcloutier.com/x86/IRET%3AIRETD%3AIRETQ.html" }; case "JMP": @@ -1817,14 +1749,14 @@ export function getAsmOpcode(opcode) { case "MOVDIR64B": return { - "html": "Moves 64-bytes as direct-store with 64-byte write atomicity from source memory address to destination memory address. The source operand is a normal memory operand. The destination operand is a memory location specified in a general-purpose register. The register content is interpreted as an offset into ES segment without any segment override. In 64-bit mode, the register operand width is 64-bits (32-bits with 67H prefix). Outside of 64-bit mode, the register width is 32-bits when CS.D=1 (16-bits with 67H prefix), and 16-bits when CS.D=0 (32-bits with 67H prefix). MOVDIR64B requires the destination address to be 64-byte aligned. No alignment restriction is enforced for source operand.
MOVDIR64B reads 64-bytes from the source memory address and performs a 64-byte direct-store operation to the destination address. The load operation follows normal read ordering based on source address memory-type. The direct-store is implemented by using the write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store.
Unlike stores with non-temporal hint which allow UC/WP memory-type for destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of destination address memory type (including UC/WP types). Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Because WC protocol used by direct-stores follow weakly-ordered memory consistency model, fencing operation using SFENCE or MFENCE should follow the MOVDIR64B instruction to enforce ordering when needed.
There is no atomicity guarantee provided for the 64-byte load operation from source address, and processor implementations may use multiple load operations to read the 64-bytes. The 64-byte direct-store issued by MOVDIR64B guarantees 64-byte write-completion atomicity. This means that the data arrives at the destination in a single undivided 64-byte write transaction.
Availability of the MOVDIR64B instruction is indicated by the presence of the CPUID feature flag MOVDIR64B (bit 28 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A).
", + "html": "Moves 64-bytes as direct-store with 64-byte write atomicity from source memory address to destination memory address. The source operand is a normal memory operand. The destination operand is a memory location specified in a general-purpose register. The register content is interpreted as an offset into ES segment without any segment override. In 64-bit mode, the register operand width is 64-bits (32-bits with 67H prefix). Outside of 64-bit mode, the register width is 32-bits when CS.D=1 (16-bits with 67H prefix), and 16-bits when CS.D=0 (32-bits with 67H prefix). MOVDIR64B requires the destination address to be 64-byte aligned. No alignment restriction is enforced for source operand.
MOVDIR64B first reads 64-bytes from the source memory address. It then performs a 64-byte direct-store operation to the destination address. The load operation follows normal read ordering based on source address memory-type. The direct-store is implemented by using the write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store.
Unlike stores with non-temporal hint which allow UC/WP memory-type for destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of destination address memory type (including UC/WP types). Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Direct stores are weakly ordered relative to other stores. Software that desires stronger ordering should use a fencing instruction (MFENCE or SFENCE) before or after a direct store to enforce the ordering desired.
There is no atomicity guarantee provided for the 64-byte load operation from source address, and processor implementations may use multiple load operations to read the 64-bytes. The 64-byte direct-store issued by MOVDIR64B guarantees 64-byte write-completion atomicity. This means that the data arrives at the destination in a single undivided 64-byte write transaction.
Availability of the MOVDIR64B instruction is indicated by the presence of the CPUID feature flag MOVDIR64B (bit 28 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A).
", "tooltip": "Moves 64-bytes as direct-store with 64-byte write atomicity from source memory address to destination memory address. The source operand is a normal memory operand. The destination operand is a memory location specified in a general-purpose register. The register content is interpreted as an offset into ES segment without any segment override. In 64-bit mode, the register operand width is 64-bits (32-bits with 67H prefix). Outside of 64-bit mode, the register width is 32-bits when CS.D=1 (16-bits with 67H prefix), and 16-bits when CS.D=0 (32-bits with 67H prefix). MOVDIR64B requires the destination address to be 64-byte aligned. No alignment restriction is enforced for source operand.", "url": "http://www.felixcloutier.com/x86/MOVDIR64B.html" }; case "MOVDIRI": return { - "html": "Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a direct-store operation. The source operand is a general purpose register. The destination operand is a 32-bit memory location. In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Use of the REX.R prefix permits access to additional registers (R8-R15). Use of the REX.W prefix promotes operation to 64 bits. See summary chart at the beginning of this section for encoding data and limits.
The direct-store is implemented by using write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store. Unlike stores with non-temporal hint that allow uncached (UC) and write-protected (WP) memory-type for the destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of the destination address memory type (including UC and WP types).
Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Because WC protocol used by direct-stores follows a weakly-ordered memory consistency model, a fencing operation using SFENCE or MFENCE should follow the MOVDIRI instruction to enforce ordering when needed.
Direct-stores issued by MOVDIRI to a destination aligned to a 4-byte boundary (8-byte boundary if used with REX.W prefix) guarantee 4-byte (8-byte with REX.W prefix) write-completion atomicity. This means that the data arrives at the destination in a single undivided 4-byte (or 8-byte) write transaction. If the destination is not aligned for the write size, the direct-stores issued by MOVDIRI are split and arrive at the destination in two parts. Each part of such split direct-store will not merge with younger stores but can arrive at the destination in either order. Availability of the MOVDIRI instruction is indicated by the presence of the CPUID feature flag MOVDIRI (bit 27 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A).
", + "html": "Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a direct-store operation. The source operand is a general purpose register. The destination operand is a 32-bit memory location. In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Use of the REX.R prefix permits access to additional registers (R8-R15). Use of the REX.W prefix promotes operation to 64 bits. See summary chart at the beginning of this section for encoding data and limits.
The direct-store is implemented by using write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store. Unlike stores with non-temporal hint that allow uncached (UC) and write-protected (WP) memory-type for the destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of the destination address memory type (including UC and WP types).
Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Direct stores are weakly ordered relative to other stores. Software that desires stronger ordering should use a fencing instruction (MFENCE or SFENCE) before or after a direct store to enforce the ordering desired.
Direct-stores issued by MOVDIRI to a destination aligned to a 4-byte boundary (8-byte boundary if used with REX.W prefix) guarantee 4-byte (8-byte with REX.W prefix) write-completion atomicity. This means that the data arrives at the destination in a single undivided 4-byte (or 8-byte) write transaction. If the destination is not aligned for the write size, the direct-stores issued by MOVDIRI are split and arrive at the destination in two parts. Each part of such split direct-store will not merge with younger stores but can arrive at the destination in either order. Availability of the MOVDIRI instruction is indicated by the presence of the CPUID feature flag MOVDIRI (bit 27 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A).
", "tooltip": "Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a direct-store operation. The source operand is a general purpose register. The destination operand is a 32-bit memory location. In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Use of the REX.R prefix permits access to additional registers (R8-R15). Use of the REX.W prefix promotes operation to 64 bits. See summary chart at the beginning of this section for encoding data and limits.", "url": "http://www.felixcloutier.com/x86/MOVDIRI.html" }; @@ -1926,7 +1858,7 @@ export function getAsmOpcode(opcode) { case "MOVNTDQA": case "VMOVNTDQA": return { - "html": "MOVNTDQA loads a double quadword from the source operand (second operand) to the destination operand (first operand) using a non-temporal hint if the memory source is WC (write combining) memory type. For WC memory type, the nontemporal hint may be implemented by loading a temporary internal buffer with the equivalent of an aligned cache line without filling this data to the cache. Any memory-type aliased lines in the cache will be snooped and flushed. Subsequent MOVNTDQA reads to unread portions of the WC cache line will receive data from the temporary internal buffer if data is available. The temporary internal buffer may be flushed by the processor at any time for any reason, for example:
a mis-speculation condition, and various fault conditions
The non-temporal hint is implemented by using a write combining (WC) memory type protocol when reading the data from memory. Using this protocol, the processor
does not read the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. The memory type of the region being read can override the non-temporal hint, if the memory address specified for the non-temporal read is not a WC memory region. Information on non-temporal reads and writes can be found in \u201cCaching of Temporal vs. Non-Temporal Data\u201d in Chapter 10 in the Intel\u00ae 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.
Because the WC protocol uses a weakly-ordered memory consistency model, a fencing operation implemented with a MFENCE instruction should be used in conjunction with MOVNTDQA instructions if multiple processors might use different memory types for the referenced memory locations or to synchronize reads of a processor with writes by other agents in the system. A processor\u2019s implementation of the streaming load hint does not override the effective memory type, but the implementation of the hint is processor dependent. For example, a processor implementa-
", + "html": "MOVNTDQA loads a double quadword from the source operand (second operand) to the destination operand (first operand) using a non-temporal hint if the memory source is WC (write combining) memory type. For WC memory type, the nontemporal hint may be implemented by loading a temporary internal buffer with the equivalent of an aligned cache line without filling this data to the cache. Any memory-type aliased lines in the cache will be snooped and flushed. Subsequent MOVNTDQA reads to unread portions of the WC cache line will receive data from the temporary internal buffer if data is available. The temporary internal buffer may be flushed by the processor at any time for any reason, for example:
a mis-speculation condition, and various fault conditions
The non-temporal hint is implemented by using a write combining (WC) memory type protocol when reading the data from memory. Using this protocol, the processor does not read the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. The memory type of the region being read can override the non-temporal hint, if the memory address specified for the non-temporal read is not a WC memory region. Information on non-temporal reads and writes can be found in \u201cCaching of Temporal vs. NonTemporal Data\u201d in Chapter 10 in the Intel\u00ae 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.
Because the WC protocol uses a weakly-ordered memory consistency model, a fencing operation implemented with a MFENCE instruction should be used in conjunction with MOVNTDQA instructions if multiple processors might use different memory types for the referenced memory locations or to synchronize reads of a processor with writes by other agents in the system. A processor\u2019s implementation of the streaming load hint does not override the effective memory type, but the implementation of the hint is processor dependent. For example, a processor implementation may choose to ignore the hint and process the instruction as a normal MOVDQA for any memory type. Alter-
natively, another implementation may optimize cache reads generated by MOVNTDQA on WB memory type to reduce cache evictions.
", "tooltip": "MOVNTDQA loads a double quadword from the source operand (second operand) to the destination operand (first operand) using a non-temporal hint if the memory source is WC (write combining) memory type. For WC memory type, the nontemporal hint may be implemented by loading a temporary internal buffer with the equivalent of an aligned cache line without filling this data to the cache. Any memory-type aliased lines in the cache will be snooped and flushed. Subsequent MOVNTDQA reads to unread portions of the WC cache line will receive data from the temporary internal buffer if data is available. The temporary internal buffer may be flushed by the processor at any time for any reason, for example", "url": "http://www.felixcloutier.com/x86/MOVNTDQA.html" }; @@ -2141,7 +2073,7 @@ export function getAsmOpcode(opcode) { case "OUT": return { - "html": "Copies the value from the second operand (source operand) to the I/O port specified with the destination operand (first operand). The source operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively); the destination operand can be a byte-immediate or the DX register. Using a byte immediate allows I/O port addresses 0 to 255 to be accessed; using the DX register as a source operand allows I/O ports from 0 to 65,535 to be accessed.
The size of the I/O port being accessed is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.
At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.
This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 18, \u201cInput/Output,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for more information on accessing I/O ports in the I/O address space.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", + "html": "Copies the value from the second operand (source operand) to the I/O port specified with the destination operand (first operand). The source operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively); the destination operand can be a byte-immediate or the DX register. Using a byte immediate allows I/O port addresses 0 to 255 to be accessed; using the DX register as a source operand allows I/O ports from 0 to 65,535 to be accessed.
The size of the I/O port being accessed is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.
At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.
This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 19, \u201cInput/Output,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for more information on accessing I/O ports in the I/O address space.
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", "tooltip": "Copies the value from the second operand (source operand) to the I/O port specified with the destination operand (first operand). The source operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively); the destination operand can be a byte-immediate or the DX register. Using a byte immediate allows I/O port addresses 0 to 255 to be accessed; using the DX register as a source operand allows I/O ports from 0 to 65,535 to be accessed.", "url": "http://www.felixcloutier.com/x86/OUT.html" }; @@ -2151,7 +2083,7 @@ export function getAsmOpcode(opcode) { case "OUTSD": case "OUTSW": return { - "html": "Copies data from the source operand (second operand) to the I/O port specified with the destination operand (first operand). The source operand is a memory location, the address of which is read from either the DS:SI, DS:ESI or the RSI registers (depending on the address-size attribute of the instruction, 16, 32 or 64, respectively). (The DS segment may be overridden with a segment override prefix.) The destination operand is an I/O port address (from 0 to 65,535) that is read from the DX register. The size of the I/O port being accessed (that is, the size of the source and destination operands) is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.
At the assembly-code level, two forms of this instruction are allowed: the \u201cexplicit-operands\u201d form and the \u201cno-operands\u201d form. The explicit-operands form (specified with the OUTS mnemonic) allows the source and destination operands to be specified explicitly. Here, the source operand should be a symbol that indicates the size of the I/O port and the source address, and the destination operand must be DX. This explicit-operands form is provided to allow documentation; however, note that the documentation provided by this form can be misleading. That is, the source operand symbol must specify the correct type (size) of the operand (byte, word, or doubleword), but it does not have to specify the correct location. The location is always specified by the DS:(E)SI or RSI registers, which must be loaded correctly before the OUTS instruction is executed.
The no-operands form provides \u201cshort forms\u201d of the byte, word, and doubleword versions of the OUTS instructions. Here also DS:(E)SI is assumed to be the source operand and DX is assumed to be the destination operand. The size of the I/O port is specified with the choice of mnemonic: OUTSB (byte), OUTSW (word), or OUTSD (doubleword).
After the byte, word, or doubleword is transferred from the memory location to the I/O port, the SI/ESI/RSI register is incremented or decremented automatically according to the setting of the DF flag in the EFLAGS register. (If the DF flag is 0, the (E)SI register is incremented; if the DF flag is 1, the SI/ESI/RSI register is decremented.) The SI/ESI/RSI register is incremented or decremented by 1 for byte operations, by 2 for word operations, and by 4 for doubleword operations.
The OUTS, OUTSB, OUTSW, and OUTSD instructions can be preceded by the REP prefix for block input of ECX bytes, words, or doublewords. See \u201cREP/REPE/REPZ /REPNE/REPNZ\u2014Repeat String Operation Prefix\u201d in this chapter for a description of the REP prefix. This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 18, \u201cInput/Output,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for more information on accessing I/O ports in the I/O address space.
", + "html": "Copies data from the source operand (second operand) to the I/O port specified with the destination operand (first operand). The source operand is a memory location, the address of which is read from either the DS:SI, DS:ESI or the RSI registers (depending on the address-size attribute of the instruction, 16, 32 or 64, respectively). (The DS segment may be overridden with a segment override prefix.) The destination operand is an I/O port address (from 0 to 65,535) that is read from the DX register. The size of the I/O port being accessed (that is, the size of the source and destination operands) is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.
At the assembly-code level, two forms of this instruction are allowed: the \u201cexplicit-operands\u201d form and the \u201cno-operands\u201d form. The explicit-operands form (specified with the OUTS mnemonic) allows the source and destination operands to be specified explicitly. Here, the source operand should be a symbol that indicates the size of the I/O port and the source address, and the destination operand must be DX. This explicit-operands form is provided to allow documentation; however, note that the documentation provided by this form can be misleading. That is, the source operand symbol must specify the correct type (size) of the operand (byte, word, or doubleword), but it does not have to specify the correct location. The location is always specified by the DS:(E)SI or RSI registers, which must be loaded correctly before the OUTS instruction is executed.
The no-operands form provides \u201cshort forms\u201d of the byte, word, and doubleword versions of the OUTS instructions. Here also DS:(E)SI is assumed to be the source operand and DX is assumed to be the destination operand. The size of the I/O port is specified with the choice of mnemonic: OUTSB (byte), OUTSW (word), or OUTSD (doubleword).
After the byte, word, or doubleword is transferred from the memory location to the I/O port, the SI/ESI/RSI register is incremented or decremented automatically according to the setting of the DF flag in the EFLAGS register. (If the DF flag is 0, the (E)SI register is incremented; if the DF flag is 1, the SI/ESI/RSI register is decremented.) The SI/ESI/RSI register is incremented or decremented by 1 for byte operations, by 2 for word operations, and by 4 for doubleword operations.
The OUTS, OUTSB, OUTSW, and OUTSD instructions can be preceded by the REP prefix for block input of ECX bytes, words, or doublewords. See \u201cREP/REPE/REPZ /REPNE/REPNZ\u2014Repeat String Operation Prefix\u201d in this chapter for a description of the REP prefix. This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 19, \u201cInput/Output,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1, for more information on accessing I/O ports in the I/O address space.
", "tooltip": "Copies data from the source operand (second operand) to the I/O port specified with the destination operand (first operand). The source operand is a memory location, the address of which is read from either the DS:SI, DS:ESI or the RSI registers (depending on the address-size attribute of the instruction, 16, 32 or 64, respectively). (The DS segment may be overridden with a segment override prefix.) The destination operand is an I/O port address (from 0 to 65,535) that is read from the DX register. The size of the I/O port being accessed (that is, the size of the source and destination operands) is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.", "url": "http://www.felixcloutier.com/x86/OUTS%3AOUTSB%3AOUTSW%3AOUTSD.html" }; @@ -2232,7 +2164,7 @@ export function getAsmOpcode(opcode) { case "PALIGNR": case "VPALIGNR": return { - "html": "(V)PALIGNR concatenates the destination operand (the first operand) and the source operand (the second operand) into an intermediate composite, shifts the composite at byte granularity to the right by a constant immediate, and extracts the right-aligned result into the destination. The first and the second operands can be an MMX,
XMM or a YMM register. The immediate value is considered unsigned. Immediate shift counts larger than the 2L (i.e. 32 for 128-bit operands, or 16 for 64-bit operands) produce a zero result. Both operands can be MMX registers, XMM registers or YMM registers. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
In 64-bit mode and not encoded by VEX/EVEX prefix, use the REX prefix to access additional registers.
128-bit Legacy SSE version: Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
EVEX.512 encoded version: The first source operand is a ZMM register and contains four 16-byte blocks. The second source operand is a ZMM register or a 512-bit memory location containing four 16-byte block. The destination operand is a ZMM register and contain four 16-byte results. The imm8[7:0] is the common shift count
", + "html": "(V)PALIGNR concatenates the destination operand (the first operand) and the source operand (the second operand) into an intermediate composite, shifts the composite at byte granularity to the right by a constant immediate, and extracts the right-aligned result into the destination. The first and the second operands can be an MMX,
XMM or a YMM register. The immediate value is considered unsigned. Immediate shift counts larger than the 2L (i.e., 32 for 128-bit operands, or 16 for 64-bit operands) produce a zero result. Both operands can be MMX registers, XMM registers or YMM registers. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
In 64-bit mode and not encoded by VEX/EVEX prefix, use the REX prefix to access additional registers.
128-bit Legacy SSE version: Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
EVEX.512 encoded version: The first source operand is a ZMM register and contains four 16-byte blocks. The second source operand is a ZMM register or a 512-bit memory location containing four 16-byte block. The destination operand is a ZMM register and contain four 16-byte results. The imm8[7:0] is the common shift count
", "tooltip": "(V)PALIGNR concatenates the destination operand (the first operand) and the source operand (the second operand) into an intermediate composite, shifts the composite at byte granularity to the right by a constant immediate, and extracts the right-aligned result into the destination. The first and the second operands can be an MMX", "url": "http://www.felixcloutier.com/x86/PALIGNR.html" }; @@ -2293,7 +2225,7 @@ export function getAsmOpcode(opcode) { case "PCLMULQDQ": case "VPCLMULQDQ": return { - "html": "Performs a carry-less multiplication of two quadwords, selected from the first source and second source operand according to the value of the immediate byte. Bits 4 and 0 are used to select which 64-bit half of each operand to use according to Table 4-13, other bits of the immediate byte are ignored.
The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
Compilers and assemblers may implement the following pseudo-op syntax to simply programming and emit the required encoding for Imm8.
", + "html": "Performs a carry-less multiplication of two quadwords, selected from the first source and second source operand according to the value of the immediate byte. Bits 4 and 0 are used to select which 64-bit half of each operand to use according to Table 4-13, other bits of the immediate byte are ignored.
The EVEX encoded form of this instruction does not support memory fault suppression.
The first source operand and the destination operand are the same and must be a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register or a 512/256/128-bit memory location. Bits (VL_MAX-1:128) of the corresponding YMM destination register remain unchanged.
Compilers and assemblers may implement the following pseudo-op syntax to simplify programming and emit the required encoding for imm8.
", "tooltip": "Performs a carry-less multiplication of two quadwords, selected from the first source and second source operand according to the value of the immediate byte. Bits 4 and 0 are used to select which 64-bit half of each operand to use according to Table 4-13, other bits of the immediate byte are ignored.", "url": "http://www.felixcloutier.com/x86/PCLMULQDQ.html" }; @@ -2366,6 +2298,13 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/PCMPISTRM.html" }; + case "PCONFIG": + return { + "html": "PCONFIG allows software to configure certain platform features. PCONFIG supports multiple leaf functions, with a leaf function identified by the value in EAX. The registers RBX, RCX, and RDX may provide input or output information for certain leaves. All leaves write status information to EAX but do not modify RBX, RCX, or RDX unless they are being used as leaf-specific output.
Each PCONFIG leaf function applies to a specific hardware block called a PCONFIG target, and each PCONFIG target is associated with a numerical target identifier. Supported target identifiers are enumerated, along with other PCONFIG capabilities, in the sub-leaves of the PCONFIG-information leaf of CPUID (EAX = 1BH). An attempt to execute an undefined leaf function, or a leaf function that applies to an unsupported target identifier, results in a general-protection exception (#GP). (In the future, the PCONFIG-information leaf of CPUID may enumerate PCONFIG capabilities in addition to the supported target identifiers.)
Addresses and operands are 32 bits outside 64-bit mode and are 64 bits in 64-bit mode. The value of CS.D does not affect operand size or address size.
Table 4-15 shows the leaf encodings for PCONFIG, and Table 4-16 shows the leaf register usage for PCONFIG.
The MKTME_KEY_PROGRAM leaf of PCONFIG pertains to the MKTME1 target, which has target identifier 1. It is used by software to manage the key associated with a KeyID. The leaf function is invoked by setting the leaf value of 0 in EAX and the address of MKTME_KEY_PROGRAM_STRUCT in RBX. Successful execution of the leaf clears RAX (set to zero) and ZF, CF, PF, AF, OF, and SF are cleared. In case of failure, the failure reason is indicated in RAX with ZF set to 1 and CF, PF, AF, OF, and SF are cleared. The MKTME_KEY_PROGRAM leaf uses the MKTME_KEY_PROGRAM_STRUCT in memory shown in Table 4-17.
", + "tooltip": "PCONFIG allows software to configure certain platform features. PCONFIG supports multiple leaf functions, with a leaf function identified by the value in EAX. The registers RBX, RCX, and RDX may provide input or output information for certain leaves. All leaves write status information to EAX but do not modify RBX, RCX, or RDX unless they are being used as leaf-specific output.", + "url": "http://www.felixcloutier.com/x86/PCONFIG.html" + }; + case "PDEP": return { "html": "PDEP uses a mask in the second source operand (the third operand) to transfer/scatter contiguous low order bits in the first source operand (the second operand) into the destination (the first operand). PDEP takes the low bits from the first source operand and deposit them in the destination operand at the corresponding bit locations that are set in the second source operand (mask). All other bits (bits not set in mask) in destination are set to zero.
This instruction is not supported in real mode and virtual-8086 mode. The operand size is always 32 bits if not in 64-bit mode. In 64-bit mode operand size 64 requires VEX.W1. VEX.W1 is ignored in non-64-bit modes. An attempt to execute this instruction with VEX.L not equal to 0 will cause #UD.
", @@ -2459,8 +2398,8 @@ export function getAsmOpcode(opcode) { case "PINSRW": case "VPINSRW": return { - "html": "Copies a word from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other words in the destination register are left untouched.) The source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The destination operand can be an MMX technology register or an XMM register. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location.
In 64-bit mode and not encoded with VEX/EVEX, using a REX prefix in the form of REX.R permits this instruction to access additional registers (XMM8-XMM15, R8-15).
128-bit Legacy SSE version: Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed. VEX.L must be 0, otherwise the instruction will #UD.
EVEX.128 encoded version: Bits (MAXVL-1:128) of the destination register are zeroed. EVEX.L\u2019L must be 0, otherwise the instruction will #UD.
", - "tooltip": "Copies a word from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other words in the destination register are left untouched.) The source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The destination operand can be an MMX technology register or an XMM register. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location.", + "html": "Three operand MMX and SSE instructions:
Copies a word from the source operand and inserts it in the destination operand at the location specified with the count operand. (The other words in the destination register are left untouched.) The source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The destination operand can be an MMX technology register or an XMM register. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location.
Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
Four operand AVX and AVX-512 instructions:
Combines a word from the first source operand with the second source operand, and inserts it in the destination operand at the location specified with the count operand. The second source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The first source and destination operands are XMM registers. The count operand is an 8-bit immediate. When specifying a word location, the 3 least-significant bits specify the location.
", + "tooltip": "Three operand MMX and SSE instructions", "url": "http://www.felixcloutier.com/x86/PINSRW.html" }; @@ -2597,7 +2536,7 @@ export function getAsmOpcode(opcode) { case "PMULDQ": case "VPMULDQ": return { - "html": "Multiplies packed signed doubleword integers in the even-numbered (zero-based reference) elements of the first source operand with the packed signed doubleword integers in the corresponding elements of the second source operand and stores packed signed quadword results in the destination operand.
128-bit Legacy SSE version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e. the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation. The first source operand and the destination XMM operand is the same. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register remain unchanged.
VEX.128 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation.The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register are zeroed.
VEX.256 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e. the first, 3rd, 5th, 7th doubleword element. For 256-bit memory operands, 256 bits are fetched from memory, but only the four even-numbered doublewords are used in the computation. The first source operand and the destination operand are YMM registers. The second source operand can be a YMM register or 256-bit memory location. Bits (MAXVL-1:256) of the corresponding destination ZMM register are zeroed.
EVEX encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands. The first source operand is a ZMM/YMM/XMM registers. The second source operand can be an ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination is a ZMM/YMM/XMM register, and updated according to the writemask at 64-bit granularity.
", + "html": "Multiplies packed signed doubleword integers in the even-numbered (zero-based reference) elements of the first source operand with the packed signed doubleword integers in the corresponding elements of the second source operand and stores packed signed quadword results in the destination operand.
128-bit Legacy SSE version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation. The first source operand and the destination XMM operand is the same. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register remain unchanged.
VEX.128 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation.The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register are zeroed.
VEX.256 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first, 3rd, 5th, 7th doubleword element. For 256-bit memory operands, 256 bits are fetched from memory, but only the four even-numbered doublewords are used in the computation. The first source operand and the destination operand are YMM registers. The second source operand can be a YMM register or 256-bit memory location. Bits (MAXVL-1:256) of the corresponding destination ZMM register are zeroed.
EVEX encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands. The first source operand is a ZMM/YMM/XMM registers. The second source operand can be an ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination is a ZMM/YMM/XMM register, and updated according to the writemask at 64-bit granularity.
", "tooltip": "Multiplies packed signed doubleword integers in the even-numbered (zero-based reference) elements of the first source operand with the packed signed doubleword integers in the corresponding elements of the second source operand and stores packed signed quadword results in the destination operand.", "url": "http://www.felixcloutier.com/x86/PMULDQ.html" }; @@ -2677,7 +2616,7 @@ export function getAsmOpcode(opcode) { case "POPFD": case "POPFQ": return { - "html": "Pops a doubleword (POPFD) from the top of the stack (if the current operand-size attribute is 32) and stores the value in the EFLAGS register, or pops a word from the top of the stack (if the operand-size attribute is 16) and stores it in the lower 16 bits of the EFLAGS register (that is, the FLAGS register). These instructions reverse the operation of the PUSHF/PUSHFD/PUSHFQ instructions.
The POPF (pop flags) and POPFD (pop flags double) mnemonics reference the same opcode. The POPF instruction is intended for use when the operand-size attribute is 16; the POPFD instruction is intended for use when the operand-size attribute is 32. Some assemblers may force the operand size to 16 for POPF and to 32 for POPFD. Others may treat the mnemonics as synonyms (POPF/POPFD) and use the setting of the operand-size attribute to determine the size of values to pop from the stack.
The effect of POPF/POPFD on the EFLAGS register changes, depending on the mode of operation. See Table 4-15 and the key below for details.
When operating in protected, compatibility, or 64-bit mode at privilege level 0 (or in real-address mode, the equivalent to privilege level 0), all non-reserved flags in the EFLAGS register except RF1, VIP, VIF, and VM may be modified. VIP, VIF and VM remain unaffected.
When operating in protected, compatibility, or 64-bit mode with a privilege level greater than 0, but less than or equal to IOPL, all flags can be modified except the IOPL field and RF, IF, VIP, VIF, and VM; these remain unaffected. The AC and ID flags can only be modified if the operand-size attribute is 32. The interrupt flag (IF) is altered only when executing at a level at least as privileged as the IOPL. If a POPF/POPFD instruction is executed with insufficient privilege, an exception does not occur but privileged bits do not change.
", + "html": "Pops a doubleword (POPFD) from the top of the stack (if the current operand-size attribute is 32) and stores the value in the EFLAGS register, or pops a word from the top of the stack (if the operand-size attribute is 16) and stores it in the lower 16 bits of the EFLAGS register (that is, the FLAGS register). These instructions reverse the operation of the PUSHF/PUSHFD/PUSHFQ instructions.
The POPF (pop flags) and POPFD (pop flags double) mnemonics reference the same opcode. The POPF instruction is intended for use when the operand-size attribute is 16; the POPFD instruction is intended for use when the operand-size attribute is 32. Some assemblers may force the operand size to 16 for POPF and to 32 for POPFD. Others may treat the mnemonics as synonyms (POPF/POPFD) and use the setting of the operand-size attribute to determine the size of values to pop from the stack.
The effect of POPF/POPFD on the EFLAGS register changes, depending on the mode of operation. See Table 4-21 and the key below for details.
When operating in protected, compatibility, or 64-bit mode at privilege level 0 (or in real-address mode, the equivalent to privilege level 0), all non-reserved flags in the EFLAGS register except RF1, VIP, VIF, and VM may be modified. VIP, VIF and VM remain unaffected.
When operating in protected, compatibility, or 64-bit mode with a privilege level greater than 0, but less than or equal to IOPL, all flags can be modified except the IOPL field and RF, IF, VIP, VIF, and VM; these remain unaffected. The AC and ID flags can only be modified if the operand-size attribute is 32. The interrupt flag (IF) is altered only when executing at a level at least as privileged as the IOPL. If a POPF/POPFD instruction is executed with insufficient privilege, an exception does not occur but privileged bits do not change.
", "tooltip": "Pops a doubleword (POPFD) from the top of the stack (if the current operand-size attribute is 32) and stores the value in the EFLAGS register, or pops a word from the top of the stack (if the operand-size attribute is 16) and stores it in the lower 16 bits of the EFLAGS register (that is, the FLAGS register). These instructions reverse the operation of the PUSHF/PUSHFD/PUSHFQ instructions.", "url": "http://www.felixcloutier.com/x86/POPF%3APOPFD%3APOPFQ.html" }; @@ -2701,7 +2640,7 @@ export function getAsmOpcode(opcode) { case "PREFETCHWT1": return { - "html": "Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by an intent to write hint (so that data is brought into \u2018Exclusive\u2019 state via a request for ownership) and a locality hint:
The source operand is a byte memory location. (The locality hints are encoded into the machine level instruction using bits 3 through 5 of the ModR/M byte. Use of any ModR/M value other than the specified ones will lead to unpredictable behavior.)
If the line selected is already present in the cache hierarchy at a level closer to the processor, no data movement occurs. Prefetches from uncacheable or WC memory are ignored.
The PREFETCHh instruction is merely a hint and does not affect program behavior. If executed, this instruction moves data closer to the processor in anticipation of future use.
The implementation of prefetch locality hints is implementation-dependent, and can be overloaded or ignored by a processor implementation. The amount of data prefetched is also processor implementation-dependent. It will, however, be a minimum of 32 bytes.
", + "html": "Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by an intent to write hint (so that data is brought into \u2018Exclusive\u2019 state via a request for ownership) and a locality hint:
The source operand is a byte memory location. (The locality hints are encoded into the machine level instruction using bits 3 through 5 of the ModR/M byte. Use of any ModR/M value other than the specified ones will lead to unpredictable behavior.)
If the line selected is already present in the cache hierarchy at a level closer to the processor, no data movement occurs. Prefetches from uncacheable or WC memory are ignored.
The PREFETCHWT1 instruction is merely a hint and does not affect program behavior. If executed, this instruction moves data closer to the processor in anticipation of future use.
The implementation of prefetch locality hints is implementation-dependent, and can be overloaded or ignored by a processor implementation. The amount of data prefetched is also processor implementation-dependent. It will, however, be a minimum of 32 bytes. Additional details of the implementation-dependent locality hints are described in Section 9.5, \u201cMemory Optimization Using Prefetch\u201d of the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual.
", "tooltip": "Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by an intent to write hint (so that data is brought into \u2018Exclusive\u2019 state via a request for ownership) and a locality hint", "url": "http://www.felixcloutier.com/x86/PREFETCHWT1.html" }; @@ -2735,7 +2674,7 @@ export function getAsmOpcode(opcode) { case "PSHUFD": case "VPSHUFD": return { - "html": "Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). Figure 4-16 shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in Figure 4-16) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.
For 128-bit operation, only the low 128-bit lane are operative. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.
10B - X2 ORDER Operand 11B-X7 7 6 5 4 3 2 1 0 Operand 11B-X3
The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.
In 64-bit mode and not encoded in VEX/EVEX, using REX.R permits this instruction to access XMM8-XMM15.
", + "html": "Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). Figure 4-16 shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in Figure 4-16) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.
For 128-bit operation, only the low 128-bit lane are operative. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.
10B - X2 ORDER 11B - X7 7 6 5 4 3 2 1 0 Operand 11B - X3 Operand
The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.
In 64-bit mode and not encoded in VEX/EVEX, using REX.R permits this instruction to access XMM8-XMM15.
", "tooltip": "Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). Figure 4-16 shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in Figure 4-16) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.", "url": "http://www.felixcloutier.com/x86/PSHUFD.html" }; @@ -2770,7 +2709,7 @@ export function getAsmOpcode(opcode) { case "VPSIGND": case "VPSIGNW": return { - "html": "(V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
(V)PSIGNB operates on signed bytes. (V)PSIGNW operates on 16-bit signed words. (V)PSIGND operates on signed 32-bit integers. When the source operand is a 128bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
Legacy SSE instructions: Both operands can be MMX registers. In 64-bit mode, use the REX prefix to access additional registers.
128-bit Legacy SSE version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed. VEX.L must be 0, otherwise instructions will #UD.
", + "html": "(V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
(V)PSIGNB operates on signed bytes. (V)PSIGNW operates on 16-bit signed words. (V)PSIGND operates on signed 32-bit integers.
Legacy SSE instructions: Both operands can be MMX registers. In 64-bit mode, use the REX prefix to access additional registers.
128-bit Legacy SSE version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed. VEX.L must be 0, otherwise instructions will #UD.
", "tooltip": "(V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.", "url": "http://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html" }; @@ -2876,8 +2815,8 @@ export function getAsmOpcode(opcode) { case "PTWRITE": return { - "html": "This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, Section 35.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.
Note: The instruction will #UD if prefix 66H is used.
", - "tooltip": "This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, Section 35.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.", + "html": "This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, Section 32.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.
Note: The instruction will #UD if prefix 66H is used.
", + "tooltip": "This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, Section 32.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.", "url": "http://www.felixcloutier.com/x86/PTWRITE.html" }; @@ -2911,7 +2850,7 @@ export function getAsmOpcode(opcode) { case "PUSH": return { - "html": "Decrements the stack pointer and then stores the source operand on the top of the stack. Address and operand sizes are determined and used as follows:
The address size is used only when referencing a source operand in memory.
The operand size (16, 32, or 64 bits) determines the amount by which the stack pointer is decremented (2, 4 or 8).
If the source operand is an immediate of size less than the operand size, a sign-extended value is pushed on the stack. If the source operand is a segment register (16 bits) and the operand size is 64-bits, a zero-extended value is pushed on the stack; if the operand size is 32-bits, either a zero-extended value is pushed on the stack or the segment selector is written on the stack using a 16-bit move. For the last case, all recent Core and Atom processors perform a 16-bit move, leaving the upper portion of the stack location unmodified.
The PUSH ESP instruction pushes the value of the ESP register as it existed before the instruction was executed. If a PUSH instruction uses a memory operand in which the ESP register is used for computing the operand address, the address of the operand is computed before the ESP register is decremented.
", + "html": "Decrements the stack pointer and then stores the source operand on the top of the stack. Address and operand sizes are determined and used as follows:
The address size is used only when referencing a source operand in memory.
The operand size (16, 32, or 64 bits) determines the amount by which the stack pointer is decremented (2, 4 or 8).
If the source operand is an immediate of size less than the operand size, a sign-extended value is pushed on the stack. If the source operand is a segment register (16 bits) and the operand size is 64-bits, a zero-extended value is pushed on the stack; if the operand size is 32-bits, either a zero-extended value is pushed on the stack or the segment selector is written on the stack using a 16-bit move. For the last case, all recent Intel Core and Intel Atom processors perform a 16-bit move, leaving the upper portion of the stack location unmodified.
The stack-address size determines the width of the stack pointer when writing to the stack in memory and when decrementing the stack pointer. (As stated above, the amount by which the stack pointer is decremented is determined by the operand size.)
", "tooltip": "Decrements the stack pointer and then stores the source operand on the top of the stack. Address and operand sizes are determined and used as follows", "url": "http://www.felixcloutier.com/x86/PUSH.html" }; @@ -2993,8 +2932,8 @@ export function getAsmOpcode(opcode) { case "RDPMC": return { - "html": "The EAX register is loaded with the low-order 32 bits. The EDX register is loaded with the supported high-order bits of the counter. The number of high-order bits loaded into EDX is implementation specific on processors that do no support architectural performance monitoring. The width of fixed-function and general-purpose performance counters on processors supporting architectural performance monitoring are reported by CPUID 0AH leaf. See below for the treatment of the EDX register for \u201cfast\u201d reads.
The ECX register specifies the counter type (if the processor supports architectural performance monitoring) and counter index. Counter type is specified in ECX[30] to select one of two type of performance counters. If the processor does not support architectural performance monitoring, ECX[30:0] specifies the counter index; otherwise ECX[29:0] specifies the index relative to the base of each counter type. ECX[31] selects \u201cfast\u201d read mode if supported. The two counter types are:
The width of fixed-function performance counters and general-purpose performance counters on processors supporting architectural performance monitoring are reported by CPUID 0AH leaf. The width of general-purpose performance counters are 40-bits for processors that do not support architectural performance monitoring counters. The width of special-purpose performance counters are implementation specific.
When in protected or virtual 8086 mode, the performance-monitoring counters enabled (PCE) flag in register CR4 restricts the use of the RDPMC instruction as follows. When the PCE flag is set, the RDPMC instruction can be executed at any privilege level; when the flag is clear, the instruction can only be executed at privilege level 0. (When in real-address mode, the RDPMC instruction is always enabled.)
The performance-monitoring counters can also be read with the RDMSR instruction, when executing at privilege level 0.
", - "tooltip": "The EAX register is loaded with the low-order 32 bits. The EDX register is loaded with the supported high-order bits of the counter. The number of high-order bits loaded into EDX is implementation specific on processors that do no support architectural performance monitoring. The width of fixed-function and general-purpose performance counters on processors supporting architectural performance monitoring are reported by CPUID 0AH leaf. See below for the treatment of the EDX register for \u201cfast\u201d reads.", + "html": "Reads the contents of the performance monitoring counter (PMC) specified in ECX register into registers EDX:EAX. (On processors that support the Intel 64 architecture, the high-order 32 bits of RCX are ignored.) The EDX register is loaded with the high-order 32 bits of the PMC and the EAX register is loaded with the low-order 32 bits. (On processors that support the Intel 64 architecture, the high-order 32 bits of each of RAX and RDX are cleared.) If fewer than 64 bits are implemented in the PMC being read, unimplemented bits returned to EDX:EAX will have value zero.
The width of PMCs on processors supporting architectural performance monitoring (CPUID.0AH:EAX[7:0] =\u0338 0) are reported by CPUID.0AH:EAX[23:16]. On processors that do not support architectural performance monitoring (CPUID.0AH:EAX[7:0]=0), the width of general-purpose performance PMCs is 40 bits, while the widths of special-purpose PMCs are implementation specific.
Use of ECX to specify a PMC depends on whether the processor supports architectural performance monitoring:
Specifying an unsupported PMC encoding will cause a general protection exception #GP(0). For PMC details see Chapter 19, \u201cPerformance Monitoring\u201d, in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3B.
When in protected or virtual 8086 mode, the Performance-monitoring Counters Enabled (PCE) flag in register CR4 restricts the use of the RDPMC instruction. When the PCE flag is set, the RDPMC instruction can be executed at any privilege level; when the flag is clear, the instruction can only be executed at privilege level 0. (When in real-address mode, the RDPMC instruction is always enabled.) The PMCs can also be read with the RDMSR instruction, when executing at privilege level 0.
", + "tooltip": "Reads the contents of the performance monitoring counter (PMC) specified in ECX register into registers EDX:EAX. (On processors that support the Intel 64 architecture, the high-order 32 bits of RCX are ignored.) The EDX register is loaded with the high-order 32 bits of the PMC and the EAX register is loaded with the low-order 32 bits. (On processors that support the Intel 64 architecture, the high-order 32 bits of each of RAX and RDX are cleared.) If fewer than 64 bits are implemented in the PMC being read, unimplemented bits returned to EDX:EAX will have value zero.", "url": "http://www.felixcloutier.com/x86/RDPMC.html" }; @@ -3016,7 +2955,7 @@ export function getAsmOpcode(opcode) { case "REPE": case "REPNE": return { - "html": "Repeats a string instruction the number of times specified in the count register or until the indicated condition of the ZF flag is no longer met. The REP (repeat), REPE (repeat while equal), REPNE (repeat while not equal), REPZ (repeat while zero), and REPNZ (repeat while not zero) mnemonics are prefixes that can be added to one of the string instructions. The REP prefix can be added to the INS, OUTS, MOVS, LODS, and STOS instructions, and the REPE, REPNE, REPZ, and REPNZ prefixes can be added to the CMPS and SCAS instructions. (The REPZ and REPNZ prefixes are synonymous forms of the REPE and REPNE prefixes, respectively.) The F3H prefix is defined for the following instructions and undefined for the rest:
The REP prefixes apply only to one string instruction at a time. To repeat a block of instructions, use the LOOP instruction or another looping construct. All of these repeat prefixes cause the associated instruction to be repeated until the count in register is decremented to 0. See Table 4-16.
The REPE, REPNE, REPZ, and REPNZ prefixes also check the state of the ZF flag after each iteration and terminate the repeat loop if the ZF flag is not in the specified state. When both termination conditions are tested, the cause of a repeat termination can be determined either by testing the count register with a JECXZ instruction or by testing the ZF flag (with a JZ, JNZ, or JNE instruction).
When the REPE/REPZ and REPNE/REPNZ prefixes are used, the ZF flag does not require initialization because both the CMPS and SCAS instructions affect the ZF flag according to the results of the comparisons they make.
A repeating string operation can be suspended by an exception or interrupt. When this happens, the state of the registers is preserved to allow the string operation to be resumed upon a return from the exception or interrupt handler. The source and destination registers point to the next string elements to be operated on, the EIP register points to the string instruction, and the ECX register has the value it held following the last successful iteration of the instruction. This mechanism allows long string operations to proceed without affecting the interrupt response time of the system.
", + "html": "Repeats a string instruction the number of times specified in the count register or until the indicated condition of the ZF flag is no longer met. The REP (repeat), REPE (repeat while equal), REPNE (repeat while not equal), REPZ (repeat while zero), and REPNZ (repeat while not zero) mnemonics are prefixes that can be added to one of the string instructions. The REP prefix can be added to the INS, OUTS, MOVS, LODS, and STOS instructions, and the REPE, REPNE, REPZ, and REPNZ prefixes can be added to the CMPS and SCAS instructions. (The REPZ and REPNZ prefixes are synonymous forms of the REPE and REPNE prefixes, respectively.) The F3H prefix is defined for the following instructions and undefined for the rest:
The REP prefixes apply only to one string instruction at a time. To repeat a block of instructions, use the LOOP instruction or another looping construct. All of these repeat prefixes cause the associated instruction to be repeated until the count in register is decremented to 0. See Table 4-22.
The REPE, REPNE, REPZ, and REPNZ prefixes also check the state of the ZF flag after each iteration and terminate the repeat loop if the ZF flag is not in the specified state. When both termination conditions are tested, the cause of a repeat termination can be determined either by testing the count register with a JECXZ instruction or by testing the ZF flag (with a JZ, JNZ, or JNE instruction).
When the REPE/REPZ and REPNE/REPNZ prefixes are used, the ZF flag does not require initialization because both the CMPS and SCAS instructions affect the ZF flag according to the results of the comparisons they make.
A repeating string operation can be suspended by an exception or interrupt. When this happens, the state of the registers is preserved to allow the string operation to be resumed upon a return from the exception or interrupt handler. The source and destination registers point to the next string elements to be operated on, the EIP register points to the string instruction, and the ECX register has the value it held following the last successful iteration of the instruction. This mechanism allows long string operations to proceed without affecting the interrupt response time of the system.
", "tooltip": "Repeats a string instruction the number of times specified in the count register or until the indicated condition of the ZF flag is no longer met. The REP (repeat), REPE (repeat while equal), REPNE (repeat while not equal), REPZ (repeat while zero), and REPNZ (repeat while not zero) mnemonics are prefixes that can be added to one of the string instructions. The REP prefix can be added to the INS, OUTS, MOVS, LODS, and STOS instructions, and the REPE, REPNE, REPZ, and REPNZ prefixes can be added to the CMPS and SCAS instructions. (The REPZ and REPNZ prefixes are synonymous forms of the REPE and REPNE prefixes, respectively.) The F3H prefix is defined for the following instructions and undefined for the rest", "url": "http://www.felixcloutier.com/x86/REP%3AREPE%3AREPZ%3AREPNE%3AREPNZ.html" }; @@ -3038,7 +2977,7 @@ export function getAsmOpcode(opcode) { case "ROUNDPD": case "VROUNDPD": return { - "html": "Round the 2 double-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double-precision floating-point value.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-17 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.
VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.
", + "html": "Round the 2 double-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double-precision floating-point value.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-23 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.
VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.
", "tooltip": "Round the 2 double-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double-precision floating-point value.", "url": "http://www.felixcloutier.com/x86/ROUNDPD.html" }; @@ -3046,7 +2985,7 @@ export function getAsmOpcode(opcode) { case "ROUNDPS": case "VROUNDPS": return { - "html": "Round the 4 single-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single-precision floating-point value.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-17 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.
VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.
", + "html": "Round the 4 single-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single-precision floating-point value.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-23 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.
VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.
", "tooltip": "Round the 4 single-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single-precision floating-point value.", "url": "http://www.felixcloutier.com/x86/ROUNDPS.html" }; @@ -3054,7 +2993,7 @@ export function getAsmOpcode(opcode) { case "ROUNDSD": case "VROUNDSD": return { - "html": "Round the DP FP value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double-precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-17 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:64) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", + "html": "Round the DP FP value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double-precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-23 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:64) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", "tooltip": "Round the DP FP value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double-precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.", "url": "http://www.felixcloutier.com/x86/ROUNDSD.html" }; @@ -3062,14 +3001,14 @@ export function getAsmOpcode(opcode) { case "ROUNDSS": case "VROUNDSS": return { - "html": "Round the single-precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single-precision floating-point input to an integer value and returns the result as a single-precision floating-point value in the lowest position. The upper three single-precision floating-point values in the destination are retained.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-17 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", + "html": "Round the single-precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single-precision floating-point input to an integer value and returns the result as a single-precision floating-point value in the lowest position. The upper three single-precision floating-point values in the destination are retained.
The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in Figure 4-24. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (Table 4-23 lists the encoded values for rounding-mode field).
The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.
128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged.
VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.
", "tooltip": "Round the single-precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single-precision floating-point input to an integer value and returns the result as a single-precision floating-point value in the lowest position. The upper three single-precision floating-point values in the destination are retained.", "url": "http://www.felixcloutier.com/x86/ROUNDSS.html" }; case "RSM": return { - "html": "Returns program control from system management mode (SMM) to the application program or operating-system procedure that was interrupted when the processor received an SMM interrupt. The processor\u2019s state is restored from the dump created upon entering SMM. If the processor detects invalid state information during state restoration, it enters the shutdown state. The following invalid information can cause a shutdown:
The contents of the model-specific registers are not affected by a return from SMM.
The SMM state map used by RSM supports resuming processor context for non-64-bit modes and 64-bit mode.
See Chapter 34, \u201cSystem Management Mode,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, for more information about SMM and the behavior of the RSM instruction.
", + "html": "Returns program control from system management mode (SMM) to the application program or operating-system procedure that was interrupted when the processor received an SMM interrupt. The processor\u2019s state is restored from the dump created upon entering SMM. If the processor detects invalid state information during state restoration, it enters the shutdown state. The following invalid information can cause a shutdown:
The contents of the model-specific registers are not affected by a return from SMM.
The SMM state map used by RSM supports resuming processor context for non-64-bit modes and 64-bit mode.
See Chapter 31, \u201cSystem Management Mode,\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, for more information about SMM and the behavior of the RSM instruction.
", "tooltip": "Returns program control from system management mode (SMM) to the application program or operating-system procedure that was interrupted when the processor received an SMM interrupt. The processor\u2019s state is restored from the dump created upon entering SMM. If the processor detects invalid state information during state restoration, it enters the shutdown state. The following invalid information can cause a shutdown", "url": "http://www.felixcloutier.com/x86/RSM.html" }; @@ -3134,6 +3073,20 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/SCAS%3ASCASB%3ASCASW%3ASCASD.html" }; + case "SERIALIZE": + return { + "html": "Serializes instruction execution. Before the next instruction is fetched and executed, the SERIALIZE instruction ensures that all modifications to flags, registers, and memory by previous instructions are completed, draining all buffered writes to memory. This instruction is also a serializing instruction as defined in the section \u201cSerializing Instructions\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A.
SERIALIZE does not modify registers, arithmetic flags, or memory.
", + "tooltip": "Serializes instruction execution. Before the next instruction is fetched and executed, the SERIALIZE instruction ensures that all modifications to flags, registers, and memory by previous instructions are completed, draining all buffered writes to memory. This instruction is also a serializing instruction as defined in the section \u201cSerializing Instructions\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A.", + "url": "http://www.felixcloutier.com/x86/SERIALIZE.html" + }; + + case "SETSSBSY": + return { + "html": "The SETSSBSY instruction verifies the presence of a non-busy supervisor shadow stack token at the address in the IA32_PL0_SSP MSR and marks it busy. Following successful execution of the instruction, the SSP is set to the value of the IA32_PL0_SSP MSR.
", + "tooltip": "The SETSSBSY instruction verifies the presence of a non-busy supervisor shadow stack token at the address in the IA32_PL0_SSP MSR and marks it busy. Following successful execution of the instruction, the SSP is set to the value of the IA32_PL0_SSP MSR.", + "url": "http://www.felixcloutier.com/x86/SETSSBSY.html" + }; + case "SETA": case "SETAE": case "SETB": @@ -3411,7 +3364,7 @@ export function getAsmOpcode(opcode) { case "SYSCALL": return { - "html": "SYSCALL invokes an OS system-call handler at privilege level 0. It does so by loading RIP from the IA32_LSTAR MSR (after saving the address of the instruction following SYSCALL into RCX). (The WRMSR instruction ensures that the IA32_LSTAR MSR always contain a canonical address.)
SYSCALL also saves RFLAGS into R11 and then masks RFLAGS using the IA32_FMASK MSR (MSR address C0000084H); specifically, the processor clears in RFLAGS every bit corresponding to a bit that is set in the IA32_FMASK MSR.
SYSCALL loads the CS and SS selectors with values derived from bits 47:32 of the IA32_STAR MSR. However, the CS and SS descriptor caches are not loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSCALL instruction does not ensure this correspondence.
The SYSCALL instruction does not save the stack pointer (RSP). If the OS system-call handler will change the stack pointer, it is the responsibility of software to save the previous value of the stack pointer. This might be done prior to executing SYSCALL, with software restoring the stack pointer with the instruction following SYSCALL (which will be executed after SYSRET). Alternatively, the OS system-call handler may save the stack pointer and restore it before executing SYSRET.
Instruction ordering. Instructions following a SYSCALL may be fetched from memory before earlier instructions complete execution, but they will not execute (even speculatively) until all instructions prior to the SYSCALL have completed execution (the later instructions may execute before data stored by the earlier instructions have become globally visible).
", + "html": "SYSCALL invokes an OS system-call handler at privilege level 0. It does so by loading RIP from the IA32_LSTAR MSR (after saving the address of the instruction following SYSCALL into RCX). (The WRMSR instruction ensures that the IA32_LSTAR MSR always contain a canonical address.)
SYSCALL also saves RFLAGS into R11 and then masks RFLAGS using the IA32_FMASK MSR (MSR address C0000084H); specifically, the processor clears in RFLAGS every bit corresponding to a bit that is set in the IA32_FMASK MSR.
SYSCALL loads the CS and SS selectors with values derived from bits 47:32 of the IA32_STAR MSR. However, the CS and SS descriptor caches are not loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSCALL instruction does not ensure this correspondence.
The SYSCALL instruction does not save the stack pointer (RSP). If the OS system-call handler will change the stack pointer, it is the responsibility of software to save the previous value of the stack pointer. This might be done prior to executing SYSCALL, with software restoring the stack pointer with the instruction following SYSCALL (which will be executed after SYSRET). Alternatively, the OS system-call handler may save the stack pointer and restore it before executing SYSRET.
When shadow stacks are enabled at a privilege level where the SYSCALL instruction is invoked, the SSP is saved to the IA32_PL3_SSP MSR. If shadow stacks are enabled at privilege level 0, the SSP is loaded with 0. Refer to Chapter 6, \u201cProcedure Calls, Interrupts, and Exceptions\u201d and Chapter 18, \u201cControl-Flow Enforcement Technology (CET)\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1 for additional CET details.
", "tooltip": "SYSCALL invokes an OS system-call handler at privilege level 0. It does so by loading RIP from the IA32_LSTAR MSR (after saving the address of the instruction following SYSCALL into RCX). (The WRMSR instruction ensures that the IA32_LSTAR MSR always contain a canonical address.)", "url": "http://www.felixcloutier.com/x86/SYSCALL.html" }; @@ -3432,7 +3385,7 @@ export function getAsmOpcode(opcode) { case "SYSRET": return { - "html": "SYSRET is a companion instruction to the SYSCALL instruction. It returns from an OS system-call handler to user code at privilege level 3. It does so by loading RIP from RCX and loading RFLAGS from R11.1 With a 64-bit operand size, SYSRET remains in 64-bit mode; otherwise, it enters compatibility mode and only the low 32 bits of the registers are loaded.
SYSRET loads the CS and SS selectors with values derived from bits 63:48 of the IA32_STAR MSR. However, the CS and SS descriptor caches are not loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSRET instruction does not ensure this correspondence.
The SYSRET instruction does not modify the stack pointer (ESP or RSP). For that reason, it is necessary for software to switch to the user stack. The OS may load the user stack pointer (if it was saved after SYSCALL) before executing SYSRET; alternatively, user code may load the stack pointer (if it was saved before SYSCALL) after receiving control from SYSRET.
If the OS loads the stack pointer before executing SYSRET, it must ensure that the handler of any interrupt or exception delivered between restoring the stack pointer and successful execution of SYSRET is not invoked with the user stack. It can do so using approaches such as the following:
Instruction ordering. Instructions following a SYSRET may be fetched from memory before earlier instructions complete execution, but they will not execute (even speculatively) until all instructions prior to the SYSRET have completed execution (the later instructions may execute before data stored by the earlier instructions have become globally visible).
", + "html": "SYSRET is a companion instruction to the SYSCALL instruction. It returns from an OS system-call handler to user code at privilege level 3. It does so by loading RIP from RCX and loading RFLAGS from R11.1 With a 64-bit operand size, SYSRET remains in 64-bit mode; otherwise, it enters compatibility mode and only the low 32 bits of the registers are loaded.
SYSRET loads the CS and SS selectors with values derived from bits 63:48 of the IA32_STAR MSR. However, the CS and SS descriptor caches are not loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSRET instruction does not ensure this correspondence.
The SYSRET instruction does not modify the stack pointer (ESP or RSP). For that reason, it is necessary for software to switch to the user stack. The OS may load the user stack pointer (if it was saved after SYSCALL) before executing SYSRET; alternatively, user code may load the stack pointer (if it was saved before SYSCALL) after receiving control from SYSRET.
If the OS loads the stack pointer before executing SYSRET, it must ensure that the handler of any interrupt or exception delivered between restoring the stack pointer and successful execution of SYSRET is not invoked with the user stack. It can do so using approaches such as the following:
When shadow stacks are enabled at privilege level 3 the instruction loads SSP with value from IA32_PL3_SSP MSR. Refer to Chapter 6, \u201cProcedure Calls, Interrupts, and Exceptions\u201d and Chapter 18, \u201cControl-Flow Enforcement Technology (CET)\u201d in the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1 for additional CET details.
", "tooltip": "SYSRET is a companion instruction to the SYSCALL instruction. It returns from an OS system-call handler to user code at privilege level 3. It does so by loading RIP from RCX and loading RFLAGS from R11.1 With a 64-bit operand size, SYSRET remains in 64-bit mode; otherwise, it enters compatibility mode and only the low 32 bits of the registers are loaded.", "url": "http://www.felixcloutier.com/x86/SYSRET.html" }; @@ -3469,7 +3422,7 @@ export function getAsmOpcode(opcode) { case "UCOMISS": case "VUCOMISS": return { - "html": "Compares the single-precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.
The UCOMISS instruction differs from the COMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) only if a source operand is an SNaN. The COMISS instruction signals an invalid numeric exception when a source operand is either a QNaN or SNaN.
The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.
Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", + "html": "Compares the single-precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.
The UCOMISS instruction differs from the COMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) only if a source operand is an SNaN. The COMISS instruction signals an invalid operation exception when a source operand is either a QNaN or SNaN.
The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.
Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.
", "tooltip": "Compares the single-precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).", "url": "http://www.felixcloutier.com/x86/UCOMISS.html" }; @@ -3589,9 +3542,23 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VCOMPRESSPS.html" }; + case "VCVTNE2PS2BF16": + return { + "html": "Converts two SIMD registers of packed single data into a single register of packed BF16 data.
This instruction does not support memory fault suppression.
This instruction uses \u201cRound to nearest (even)\u201d rounding mode. Output denormals are always flushed to zero and input denormals are always treated as zero. MXCSR is not consulted nor updated. No floating-point exceptions are generated.
", + "tooltip": "Converts two SIMD registers of packed single data into a single register of packed BF16 data.", + "url": "http://www.felixcloutier.com/x86/VCVTNE2PS2BF16.html" + }; + + case "VCVTNEPS2BF16": + return { + "html": "Converts one SIMD register of packed single data into a single register of packed BF16 data.
This instruction uses \u201cRound to nearest (even)\u201d rounding mode. Output denormals are always flushed to zero and input denormals are always treated as zero. MXCSR is not consulted nor updated.
As the instruction operand encoding table shows, the EVEX.vvvv field is not used for encoding an operand. EVEX.vvvv is reserved and must be 0b1111 otherwise instructions will #UD.
", + "tooltip": "Converts one SIMD register of packed single data into a single register of packed BF16 data.", + "url": "http://www.felixcloutier.com/x86/VCVTNEPS2BF16.html" + }; + case "VCVTPD2QQ": return { - "html": "Converts packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", + "html": "Converts packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", "tooltip": "Converts packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).", "url": "http://www.felixcloutier.com/x86/VCVTPD2QQ.html" }; @@ -3626,7 +3593,7 @@ export function getAsmOpcode(opcode) { case "VCVTPS2QQ": return { - "html": "Converts eight packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
The source operand is a YMM/XMM/XMM (low 64- bits) register or a 256/128/64-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", + "html": "Converts eight packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
The source operand is a YMM/XMM/XMM (low 64- bits) register or a 256/128/64-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", "tooltip": "Converts eight packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.", "url": "http://www.felixcloutier.com/x86/VCVTPS2QQ.html" }; @@ -3675,56 +3642,56 @@ export function getAsmOpcode(opcode) { case "VCVTTPD2QQ": return { - "html": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", + "html": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", "tooltip": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).", "url": "http://www.felixcloutier.com/x86/VCVTTPD2QQ.html" }; case "VCVTTPD2UDQ": return { - "html": "Converts with truncation packed double-precision floating-point values in the source operand (the second operand) to packed unsigned doubleword integers in the destination operand (the first operand).
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location, or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a YMM/XMM/XMM (low 64 bits) register conditionally updated with writemask k1. The upper bits (MAXVL-1:256) of the corresponding destination are zeroed.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", + "html": "Converts with truncation packed double-precision floating-point values in the source operand (the second operand) to packed unsigned doubleword integers in the destination operand (the first operand).
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location, or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a YMM/XMM/XMM (low 64 bits) register conditionally updated with writemask k1. The upper bits (MAXVL-1:256) of the corresponding destination are zeroed.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", "tooltip": "Converts with truncation packed double-precision floating-point values in the source operand (the second operand) to packed unsigned doubleword integers in the destination operand (the first operand).", "url": "http://www.felixcloutier.com/x86/VCVTTPD2UDQ.html" }; case "VCVTTPD2UQQ": return { - "html": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed unsigned quadword integers in the destination operand (first operand).
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", + "html": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed unsigned quadword integers in the destination operand (first operand).
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", "tooltip": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed unsigned quadword integers in the destination operand (first operand).", "url": "http://www.felixcloutier.com/x86/VCVTTPD2UQQ.html" }; case "VCVTTPS2QQ": return { - "html": "Converts with truncation packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", + "html": "Converts with truncation packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2w-1, where w represents the number of bits in the destination format) is returned.
EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", "tooltip": "Converts with truncation packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.", "url": "http://www.felixcloutier.com/x86/VCVTTPS2QQ.html" }; case "VCVTTPS2UDQ": return { - "html": "Converts with truncation packed single-precision floating-point values in the source operand to sixteen unsigned doubleword integers in the destination operand.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", + "html": "Converts with truncation packed single-precision floating-point values in the source operand to sixteen unsigned doubleword integers in the destination operand.
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", "tooltip": "Converts with truncation packed single-precision floating-point values in the source operand to sixteen unsigned doubleword integers in the destination operand.", "url": "http://www.felixcloutier.com/x86/VCVTTPS2UDQ.html" }; case "VCVTTPS2UQQ": return { - "html": "Converts with truncation up to eight packed single-precision floating-point values in the source operand to unsigned quadword integers in the destination operand.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", + "html": "Converts with truncation up to eight packed single-precision floating-point values in the source operand to unsigned quadword integers in the destination operand.
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", "tooltip": "Converts with truncation up to eight packed single-precision floating-point values in the source operand to unsigned quadword integers in the destination operand.", "url": "http://www.felixcloutier.com/x86/VCVTTPS2UQQ.html" }; case "VCVTTSD2USI": return { - "html": "Converts with truncation a double-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double-precision floating-point value is contained in the low quadword of the register.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.
", + "html": "Converts with truncation a double-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double-precision floating-point value is contained in the low quadword of the register.
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.
", "tooltip": "Converts with truncation a double-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double-precision floating-point value is contained in the low quadword of the register.", "url": "http://www.felixcloutier.com/x86/VCVTTSD2USI.html" }; case "VCVTTSS2USI": return { - "html": "Converts with truncation a single-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.
When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", + "html": "Converts with truncation a single-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.
When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2w \u2013 1 is returned, where w represents the number of bits in the destination format.
EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.
Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.
", "tooltip": "Converts with truncation a single-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.", "url": "http://www.felixcloutier.com/x86/VCVTTSS2USI.html" }; @@ -3778,6 +3745,13 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VDBPSADBW.html" }; + case "VDPBF16PS": + return { + "html": "This instruction performs a SIMD dot-product of two BF16 pairs and accumulates into a packed single precision register.
\u201cRound to nearest even\u201d rounding mode is used when doing each accumulation of the FMA. Output denormals are always flushed to zero and input denormals are always treated as zero. MXCSR is not consulted nor updated.
NaN propagation priorities are described in Table 5-1.
", + "tooltip": "This instruction performs a SIMD dot-product of two BF16 pairs and accumulates into a packed single precision register.", + "url": "http://www.felixcloutier.com/x86/VDPBF16PS.html" + }; + case "VERR": case "VERW": return { @@ -3836,28 +3810,28 @@ export function getAsmOpcode(opcode) { case "VFIXUPIMMPD": return { - "html": "Perform fix-up of quad-word elements encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).
The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.
The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
", + "html": "Perform fix-up of quad-word elements encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).
The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.
The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
", "tooltip": "Perform fix-up of quad-word elements encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).", "url": "http://www.felixcloutier.com/x86/VFIXUPIMMPD.html" }; case "VFIXUPIMMPS": return { - "html": "Perform fix-up of doubleword elements encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).
The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.
The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPS can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
", + "html": "Perform fix-up of doubleword elements encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).
The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.
The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPS can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
", "tooltip": "Perform fix-up of doubleword elements encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).", "url": "http://www.felixcloutier.com/x86/VFIXUPIMMPS.html" }; case "VFIXUPIMMSD": return { - "html": "Perform a fix-up of the low quadword element encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low quadword element of the destination operand (the first operand). Bits 127:64 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 64- bit memory location.
The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).
", + "html": "Perform a fix-up of the low quadword element encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low quadword element of the destination operand (the first operand). Bits 127:64 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 64- bit memory location.
The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).
", "tooltip": "Perform a fix-up of the low quadword element encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low quadword element of the destination operand (the first operand). Bits 127:64 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 64- bit memory location.", "url": "http://www.felixcloutier.com/x86/VFIXUPIMMSD.html" }; case "VFIXUPIMMSS": return { - "html": "Perform a fix-up of the low doubleword element encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low doubleword element of the destination operand (the first operand) Bits 127:32 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 32-bit memory location.
The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).
", + "html": "Perform a fix-up of the low doubleword element encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low doubleword element of the destination operand (the first operand) Bits 127:32 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 32-bit memory location.
The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.
This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).
If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.
Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).
", "tooltip": "Perform a fix-up of the low doubleword element encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low doubleword element of the destination operand (the first operand) Bits 127:32 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 32-bit memory location.", "url": "http://www.felixcloutier.com/x86/VFIXUPIMMSS.html" }; @@ -4122,14 +4096,14 @@ export function getAsmOpcode(opcode) { case "VGETEXPSD": return { - "html": "Extracts the biased exponent from the normalized DP FP representation of the low qword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to double-precision FP value and written to the destination operand (the first operand) as DP FP numbers. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand.
The destination must be a XMM register, the source operand can be a XMM register or a float64 memory location. The low quadword element of the destination operand is conditionally updated with writemask k1.
Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in Table 5-14.
", + "html": "Extracts the biased exponent from the normalized DP FP representation of the low qword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to double-precision FP value and written to the destination operand (the first operand) as DP FP numbers. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand.
The destination must be a XMM register, the source operand can be a XMM register or a float64 memory location.
If writemasking is used, the low quadword element of the destination operand is conditionally updated depending on the value of writemask register k1. If writemasking is not used, the low quadword element of the destination operand is unconditionally updated.
Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in Table 5-14.
", "tooltip": "Extracts the biased exponent from the normalized DP FP representation of the low qword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to double-precision FP value and written to the destination operand (the first operand) as DP FP numbers. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand.", "url": "http://www.felixcloutier.com/x86/VGETEXPSD.html" }; case "VGETEXPSS": return { - "html": "Extracts the biased exponent from the normalized SP FP representation of the low doubleword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to single-precision FP value and written to the destination operand (the first operand) as SP FP numbers. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand.
The destination must be a XMM register, the source operand can be a XMM register or a float32 memory location. The the low doubleword element of the destination operand is conditionally updated with writemask k1.
Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in Table 5-15.
", + "html": "Extracts the biased exponent from the normalized SP FP representation of the low doubleword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to single-precision FP value and written to the destination operand (the first operand) as SP FP numbers. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand.
The destination must be a XMM register, the source operand can be a XMM register or a float32 memory location.
If writemasking is used, the low doubleword element of the destination operand is conditionally updated depending on the value of writemask register k1. If writemasking is not used, the low doubleword element of the destination operand is unconditionally updated.
Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in Table 5-15.
", "tooltip": "Extracts the biased exponent from the normalized SP FP representation of the low doubleword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to single-precision FP value and written to the destination operand (the first operand) as SP FP numbers. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand.", "url": "http://www.felixcloutier.com/x86/VGETEXPSS.html" }; @@ -4192,6 +4166,14 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VMASKMOV.html" }; + case "VP2INTERSECTD": + case "VP2INTERSECTQ": + return { + "html": "This instruction writes an even/odd pair of mask registers. The mask register destination indicated in the MODRM.REG field is used to form the basis of the register pair. The low bit of that field is masked off (set to zero) to create the first register of the pair.
EVEX.aaa and EVEX.z must be zero.
", + "tooltip": "This instruction writes an even/odd pair of mask registers. The mask register destination indicated in the MODRM.REG field is used to form the basis of the register pair. The low bit of that field is masked off (set to zero) to create the first register of the pair.", + "url": "http://www.felixcloutier.com/x86/VP2INTERSECTD%3AVP2INTERSECTQ.html" + }; + case "VP4DPWSSD": return { "html": "This instruction computes 4 sequential register source-block dot-products of two signed word operands with doubleword accumulation; see Figure 7-1 below. The memory operand is sequentially selected in each of the four steps.
In the above box, the notation of \u201c+3\u201d' is used to denote that the instruction accesses 4 source registers based on that operand; sources are consecutive, start in a multiple of 4 boundary, and contain the encoded register operand.
This instruction supports memory fault suppression. The entire memory operand is loaded if any bit of the lowest 16-bits of the mask is set to 1 or if a \u201cno masking\u201d encoding is used.
The tuple type Tuple1_4X implies that four 32-bit elements (16 bytes) are referenced by the memory operation portion of this instruction.
", @@ -4284,6 +4266,14 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VPCMPW%3AVPCMPUW.html" }; + case "VPCOMPRESSB": + case "VPCOMPRESSW": + return { + "html": "Compress (stores) up to 64 byte values or 32 word values from the source operand (second operand) to the destination operand (first operand), based on the active elements determined by the writemask operand. Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
Moves up to 512 bits of packed byte values from the source operand (second operand) to the destination operand (first operand). This instruction is used to store partial contents of a vector register into a byte vector or single memory location using the active elements in operand writemask.
Memory destination version: Only the contiguous vector is written to the destination memory location. EVEX.z must be zero.
Register destination version: If the vector length of the contiguous vector is less than that of the input vector in the source operand, the upper bits of the destination register are unmodified if EVEX.z is not set, otherwise the upper bits are zeroed.
This instruction supports memory fault suppression.
", + "tooltip": "Compress (stores) up to 64 byte values or 32 word values from the source operand (second operand) to the destination operand (first operand), based on the active elements determined by the writemask operand. Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.", + "url": "http://www.felixcloutier.com/x86/VPCOMPRESSB%3AVCOMPRESSW.html" + }; + case "VPCOMPRESSD": return { "html": "Compress (store) up to 16/8/4 doubleword integer values from the source operand (second operand) to the destination operand (first operand). The source operand is a ZMM/YMM/XMM register, the destination operand can be a ZMM/YMM/XMM register or a 512/256/128-bit memory location.
The opmask register k1 selects the active elements (partial vector or possibly non-contiguous if less than 16 active elements) from the source operand to compress into a contiguous vector. The contiguous vector is written to the destination starting from the low element of the destination operand.
Memory destination version: Only the contiguous vector is written to the destination memory location. EVEX.z must be zero.
Register destination version: If the vector length of the contiguous vector is less than that of the input vector in the source operand, the upper bits of the destination register are unmodified if EVEX.z is not set, otherwise the upper bits are zeroed.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
", @@ -4306,6 +4296,34 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VPCONFLICTD%3AVPCONFLICTQ.html" }; + case "VPDPBUSD": + return { + "html": "Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand.
This instruction supports memory fault suppression.
", + "tooltip": "Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand.", + "url": "http://www.felixcloutier.com/x86/VPDPBUSD.html" + }; + + case "VPDPBUSDS": + return { + "html": "Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand. If the intermediate sum overflows a 32b signed number the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.
This instruction supports memory fault suppression.
", + "tooltip": "Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand. If the intermediate sum overflows a 32b signed number the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.", + "url": "http://www.felixcloutier.com/x86/VPDPBUSDS.html" + }; + + case "VPDPWSSD": + return { + "html": "Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand.
This instruction supports memory fault suppression.
", + "tooltip": "Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand.", + "url": "http://www.felixcloutier.com/x86/VPDPWSSD.html" + }; + + case "VPDPWSSDS": + return { + "html": "Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand. If the intermediate sum overflows a 32b signed number, the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.
This instruction supports memory fault suppression.
", + "tooltip": "Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand. If the intermediate sum overflows a 32b signed number, the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.", + "url": "http://www.felixcloutier.com/x86/VPDPWSSDS.html" + }; + case "VPERM2F128": return { "html": "Permute 128 bit floating-point-containing fields from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.
Imm8[1:0] select the source for the first destination 128-bit field, imm8[5:4] select the source for the second destination field. If imm8[3] is set, the low 128-bit field is zeroed. If imm8[7] is set, the high 128-bit field is zeroed.
VEX.L must be 1, otherwise the instruction will #UD.
", @@ -4406,6 +4424,14 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VPERMT2W%3AVPERMT2D%3AVPERMT2Q%3AVPERMT2PS%3AVPERMT2PD.html" }; + case "VPEXPANDB": + case "VPEXPANDW": + return { + "html": "Expands (loads) up to 64 byte integer values or 32 word integer values from the source operand (memory operand) to the destination operand (register operand), based on the active elements determined by the writemask operand.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
Moves 128, 256 or 512 bits of packed byte integer values from the source operand (memory operand) to the destination operand (register operand). This instruction is used to load from an int8 vector register or memory location while inserting the data into sparse elements of destination vector register using the active elements pointed out by the operand writemask.
This instruction supports memory fault suppression.
Note that the compressed displacement assumes a pre-scaling (N) corresponding to the size of one single element instead of the size of the full vector.
", + "tooltip": "Expands (loads) up to 64 byte integer values or 32 word integer values from the source operand (memory operand) to the destination operand (register operand), based on the active elements determined by the writemask operand.", + "url": "http://www.felixcloutier.com/x86/VPEXPANDB%3AVPEXPANDW.html" + }; + case "VPEXPANDD": return { "html": "Expand (load) up to 16 contiguous doubleword integer values of the input vector in the source operand (the second operand) to sparse elements in the destination operand (the first operand), selected by the writemask k1. The destination operand is a ZMM register, the source operand can be a ZMM register or memory location.
The input vector starts from the lowest element in the source operand. The opmask register k1 selects the destination elements (a partial vector or sparse elements if less than 8 elements) to be replaced by the ascending elements in the input vector. Destination elements not selected by the writemask k1 are either unmodified or zeroed, depending on EVEX.z.
Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
Note that the compressed displacement assumes a pre-scaling (N) corresponding to the size of one single element instead of the size of the full vector.
", @@ -4547,6 +4573,16 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VPMULTISHIFTQB.html" }; + case "VPOPCNTB": + case "VPOPCNTD": + case "VPOPCNTQ": + case "VPOPCNTW": + return { + "html": "This instruction counts the number of bits set to one in each byte, word, dword or qword element of its source (e.g., zmm2 or memory) and places the results in the destination register (zmm1). This instruction supports memory fault suppression.
", + "tooltip": "This instruction counts the number of bits set to one in each byte, word, dword or qword element of its source (e.g., zmm2 or memory) and places the results in the destination register (zmm1). This instruction supports memory fault suppression.", + "url": "http://www.felixcloutier.com/x86/VPOPCNT.html" + }; + case "VPROLD": case "VPROLQ": case "VPROLVD": @@ -4577,6 +4613,49 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/VPSCATTERDD%3AVPSCATTERDQ%3AVPSCATTERQD%3AVPSCATTERQQ.html" }; + case "VPSHLDD": + case "VPSHLDQ": + case "VPSHLDW": + return { + "html": "Concatenate packed data, extract result shifted to the left by constant value.
This instruction supports memory fault suppression.
", + "tooltip": "Concatenate packed data, extract result shifted to the left by constant value.", + "url": "http://www.felixcloutier.com/x86/VPSHLD.html" + }; + + case "VPSHLDVD": + case "VPSHLDVQ": + case "VPSHLDVW": + return { + "html": "Concatenate packed data, extract result shifted to the left by variable value.
This instruction supports memory fault suppression.
", + "tooltip": "Concatenate packed data, extract result shifted to the left by variable value.", + "url": "http://www.felixcloutier.com/x86/VPSHLDV.html" + }; + + case "VPSHRDD": + case "VPSHRDQ": + case "VPSHRDW": + return { + "html": "Concatenate packed data, extract result shifted to the right by constant value.
This instruction supports memory fault suppression.
", + "tooltip": "Concatenate packed data, extract result shifted to the right by constant value.", + "url": "http://www.felixcloutier.com/x86/VPSHRD.html" + }; + + case "VPSHRDVD": + case "VPSHRDVQ": + case "VPSHRDVW": + return { + "html": "Concatenate packed data, extract result shifted to the right by variable value.
This instruction supports memory fault suppression.
", + "tooltip": "Concatenate packed data, extract result shifted to the right by variable value.", + "url": "http://www.felixcloutier.com/x86/VPSHRDV.html" + }; + + case "VPSHUFBITQMB": + return { + "html": "The VPSHUFBITQMB instruction performs a bit gather select using second source as control and first source as data. Each bit uses 6 control bits (2nd source operand) to select which data bit is going to be gathered (first source operand). A given bit can only access 64 different bits of data (first 64 destination bits can access first 64 data bits, second 64 destination bits can access second 64 data bits, etc.).
Control data for each output bit is stored in 8 bit elements of SRC2, but only the 6 least significant bits of each element are used.
This instruction uses write masking (zeroing only). This instruction supports memory fault suppression.
The first source operand is a ZMM register. The second source operand is a ZMM register or a memory location. The destination operand is a mask register.
", + "tooltip": "The VPSHUFBITQMB instruction performs a bit gather select using second source as control and first source as data. Each bit uses 6 control bits (2nd source operand) to select which data bit is going to be gathered (first source operand). A given bit can only access 64 different bits of data (first 64 destination bits can access first 64 data bits, second 64 destination bits can access second 64 data bits, etc.).", + "url": "http://www.felixcloutier.com/x86/VPSHUFBITQMB.html" + }; + case "VPSLLVD": case "VPSLLVQ": case "VPSLLVW": @@ -4662,28 +4741,28 @@ export function getAsmOpcode(opcode) { case "VRCP14PD": return { - "html": "This instruction performs a SIMD computation of the approximate reciprocals of eight/four/two packed double-precision floating-point values in the source operand (the second operand) and stores the packed double-precision floating-point results in the destination operand. The maximum relative error for this approximation is less than 2-14.
The source operand can be a ZMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.
The VRCP14PD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.
EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
", + "html": "This instruction performs a SIMD computation of the approximate reciprocals of eight/four/two packed double-precision floating-point values in the source operand (the second operand) and stores the packed double-precision floating-point results in the destination operand. The maximum relative error for this approximation is less than 2-14.
The source operand can be a ZMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.
The VRCP14PD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.
EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
", "tooltip": "This instruction performs a SIMD computation of the approximate reciprocals of eight/four/two packed double-precision floating-point values in the source operand (the second operand) and stores the packed double-precision floating-point results in the destination operand. The maximum relative error for this approximation is less than 2-14.", "url": "http://www.felixcloutier.com/x86/VRCP14PD.html" }; case "VRCP14PS": return { - "html": "This instruction performs a SIMD computation of the approximate reciprocals of the packed single-precision floating-point values in the source operand (the second operand) and stores the packed single-precision floating-point results in the destination operand (the first operand). The maximum relative error for this approximation is less than 2-14.
The source operand can be a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.
The VRCP14PS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.
EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
", + "html": "This instruction performs a SIMD computation of the approximate reciprocals of the packed single-precision floating-point values in the source operand (the second operand) and stores the packed single-precision floating-point results in the destination operand (the first operand). The maximum relative error for this approximation is less than 2-14.
The source operand can be a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.
The VRCP14PS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.
EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
", "tooltip": "This instruction performs a SIMD computation of the approximate reciprocals of the packed single-precision floating-point values in the source operand (the second operand) and stores the packed single-precision floating-point results in the destination operand (the first operand). The maximum relative error for this approximation is less than 2-14.", "url": "http://www.felixcloutier.com/x86/VRCP14PS.html" }; case "VRCP14SD": return { - "html": "This instruction performs a SIMD computation of the approximate reciprocal of the low double-precision floating-point value in the second source operand (the third operand) stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 64-bit memory location. The destination operand is an XMM register.
The VRCP14SD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See Table 5-22 for special-case input values.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
A numerically exact implementation of VRCP14xx can be found at:
", + "html": "This instruction performs a SIMD computation of the approximate reciprocal of the low double-precision floating-point value in the second source operand (the third operand) stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 64-bit memory location. The destination operand is an XMM register.
The VRCP14SD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See Table 5-22 for special-case input values.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
A numerically exact implementation of VRCP14xx can be found at:
", "tooltip": "This instruction performs a SIMD computation of the approximate reciprocal of the low double-precision floating-point value in the second source operand (the third operand) stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 64-bit memory location. The destination operand is an XMM register.", "url": "http://www.felixcloutier.com/x86/VRCP14SD.html" }; case "VRCP14SS": return { - "html": "This instruction performs a SIMD computation of the approximate reciprocal of the low single-precision floating-point value in the second source operand (the third operand) and stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register.
The VRCP14SS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See Table 5-23 for special-case input values.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
", + "html": "This instruction performs a SIMD computation of the approximate reciprocal of the low single-precision floating-point value in the second source operand (the third operand) and stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register.
The VRCP14SS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See Table 5-23 for special-case input values.
MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.
", "tooltip": "This instruction performs a SIMD computation of the approximate reciprocal of the low single-precision floating-point value in the second source operand (the third operand) and stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register.", "url": "http://www.felixcloutier.com/x86/VRCP14SS.html" }; @@ -4830,29 +4909,29 @@ export function getAsmOpcode(opcode) { case "VSCALEFPD": return { - "html": "Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying it by 2 power of the double-precision floating-point values in second source operand.
The equation of this operation is given by:
zmm1 := zmm2*2floor(zmm3).
Floor(zmm3) means maximum integer value \u2264 zmm3.
If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", - "tooltip": "Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying it by 2 power of the double-precision floating-point values in second source operand.", + "html": "Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying them by 2 to the power of the double-precision floating-point values in second source operand.
The equation of this operation is given by:
zmm1 := zmm2*2floor(zmm3).
Floor(zmm3) means maximum integer value \u2264 zmm3.
If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", + "tooltip": "Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying them by 2 to the power of the double-precision floating-point values in second source operand.", "url": "http://www.felixcloutier.com/x86/VSCALEFPD.html" }; case "VSCALEFPS": return { - "html": "Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying it by 2 power of the float32 values in second source operand.
The equation of this operation is given by:
zmm1 := zmm2*2floor(zmm3).
Floor(zmm3) means maximum integer value \u2264 zmm3.
If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", - "tooltip": "Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying it by 2 power of the float32 values in second source operand.", + "html": "Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying them by 2 to the power of the float32 values in second source operand.
The equation of this operation is given by:
zmm1 := zmm2*2floor(zmm3).
Floor(zmm3) means maximum integer value \u2264 zmm3.
If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", + "tooltip": "Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying them by 2 to the power of the float32 values in second source operand.", "url": "http://www.felixcloutier.com/x86/VSCALEFPS.html" }; case "VSCALEFSD": return { - "html": "Performs a floating-point scale of the packed double-precision floating-point value in the first source operand by multiplying it by 2 power of the double-precision floating-point value in second source operand.
The equation of this operation is given by:
xmm1 := xmm2*2floor(xmm3).
Floor(xmm3) means maximum integer value \u2264 xmm3.
If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", - "tooltip": "Performs a floating-point scale of the packed double-precision floating-point value in the first source operand by multiplying it by 2 power of the double-precision floating-point value in second source operand.", + "html": "Performs a floating-point scale of the scalar double-precision floating-point value in the first source operand by multiplying it by 2 to the power of the double-precision floating-point value in second source operand.
The equation of this operation is given by:
xmm1 := xmm2*2floor(xmm3).
Floor(xmm3) means maximum integer value \u2264 xmm3.
If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", + "tooltip": "Performs a floating-point scale of the scalar double-precision floating-point value in the first source operand by multiplying it by 2 to the power of the double-precision floating-point value in second source operand.", "url": "http://www.felixcloutier.com/x86/VSCALEFSD.html" }; case "VSCALEFSS": return { - "html": "Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 power of the float32 value in second source operand.
The equation of this operation is given by:
xmm1 := xmm2*2floor(xmm3).
Floor(xmm3) means maximum integer value \u2264 xmm3.
If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", - "tooltip": "Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 power of the float32 value in second source operand.", + "html": "Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 to the power of the float32 value in second source operand.
The equation of this operation is given by:
xmm1 := xmm2*2floor(xmm3).
Floor(xmm3) means maximum integer value \u2264 xmm3.
If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.
", + "tooltip": "Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 to the power of the float32 value in second source operand.", "url": "http://www.felixcloutier.com/x86/VSCALEFSS.html" }; @@ -4906,15 +4985,15 @@ export function getAsmOpcode(opcode) { case "VZEROALL": return { - "html": "The instruction zeros contents of all XMM or YMM registers.
Note: VEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.
", - "tooltip": "The instruction zeros contents of all XMM or YMM registers.", + "html": "In 64-bit mode, the instruction zeroes XMM0-XMM15, YMM0-YMM15, and ZMM0-ZMM15. Outside 64-bit mode, it zeroes only XMM0-XMM7, YMM0-YMM7, and ZMM0-ZMM7. VZEROALL does not modify ZMM16-ZMM31.
Note: VEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.
", + "tooltip": "In 64-bit mode, the instruction zeroes XMM0-XMM15, YMM0-YMM15, and ZMM0-ZMM15. Outside 64-bit mode, it zeroes only XMM0-XMM7, YMM0-YMM7, and ZMM0-ZMM7. VZEROALL does not modify ZMM16-ZMM31.", "url": "http://www.felixcloutier.com/x86/VZEROALL.html" }; case "VZEROUPPER": return { - "html": "The instruction zeros the bits in position 128 and higher of all YMM registers. The lower 128-bits of the registers (the corresponding XMM registers) are unmodified.
This instruction is recommended when transitioning between AVX and legacy SSE code - it will eliminate performance penalties caused by false dependencies.
Note: VEX.vvvv is reserved and must be 1111b otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.
", - "tooltip": "The instruction zeros the bits in position 128 and higher of all YMM registers. The lower 128-bits of the registers (the corresponding XMM registers) are unmodified.", + "html": "In 64-bit mode, the instruction zeroes the bits in positions 128 and higher in YMM0-YMM15 and ZMM0-ZMM15. Outside 64-bit mode, it zeroes those bits only in YMM0-YMM7 and ZMM0-ZMM7. VZEROUPPER does not modify the lower 128 bits of these registers and it does not modify ZMM16-ZMM31.
This instruction is recommended when transitioning between AVX and legacy SSE code; it will eliminate performance penalties caused by false dependencies.
Note: VEX.vvvv is reserved and must be 1111b otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.
", + "tooltip": "In 64-bit mode, the instruction zeroes the bits in positions 128 and higher in YMM0-YMM15 and ZMM0-ZMM15. Outside 64-bit mode, it zeroes those bits only in YMM0-YMM7 and ZMM0-ZMM7. VZEROUPPER does not modify the lower 128 bits of these registers and it does not modify ZMM16-ZMM31.", "url": "http://www.felixcloutier.com/x86/VZEROUPPER.html" }; @@ -4933,6 +5012,13 @@ export function getAsmOpcode(opcode) { "url": "http://www.felixcloutier.com/x86/WBINVD.html" }; + case "WBNOINVD": + return { + "html": "The WBNOINVD instruction writes back all modified cache lines in the processor\u2019s internal cache to main memory but does not invalidate (flush) the internal caches.
After executing this instruction, the processor does not wait for the external caches to complete their write-back operation before proceeding with instruction execution. It is the responsibility of hardware to respond to the cache write-back signal. The amount of time or cycles for WBNOINVD to complete will vary due to size and other factors of different cache hierarchies. As a consequence, the use of the WBNOINVD instruction can have an impact on logical processor interrupt/event response time.
The WBNOINVD instruction is a privileged instruction. When the processor is running in protected mode, the CPL of a program or procedure must be 0 to execute this instruction. This instruction is also a serializing instruction (see \u201cSerializing Instructions\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A).
This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.
", + "tooltip": "The WBNOINVD instruction writes back all modified cache lines in the processor\u2019s internal cache to main memory but does not invalidate (flush) the internal caches.", + "url": "http://www.felixcloutier.com/x86/WBNOINVD.html" + }; + case "WRFSBASE": case "WRGSBASE": return { @@ -4950,7 +5036,7 @@ export function getAsmOpcode(opcode) { case "WRPKRU": return { - "html": "Writes the value of EAX into PKRU. ECX and EDX must be 0 when WRPKRU is executed; otherwise, a general-protection exception (#GP) occurs.
WRPKRU can be executed only if CR4.PKE = 1; otherwise, an invalid-opcode exception (#UD) occurs. Software can discover the value of CR4.PKE by examining CPUID.(EAX=07H,ECX=0H):ECX.OSPKE [bit 4].
On processors that support the Intel 64 Architecture, the high-order 32-bits of RCX, RDX and RAX are ignored.
", + "html": "Writes the value of EAX into PKRU. ECX and EDX must be 0 when WRPKRU is executed; otherwise, a general-protection exception (#GP) occurs.
WRPKRU can be executed only if CR4.PKE = 1; otherwise, an invalid-opcode exception (#UD) occurs. Software can discover the value of CR4.PKE by examining CPUID.(EAX=07H,ECX=0H):ECX.OSPKE [bit 4].
On processors that support the Intel 64 Architecture, the high-order 32-bits of RCX, RDX and RAX are ignored.
WRPKRU will never execute speculatively. Memory accesses affected by PKRU register will not execute (even speculatively) until all prior executions of WRPKRU have completed execution and updated the PKRU register.
", "tooltip": "Writes the value of EAX into PKRU. ECX and EDX must be 0 when WRPKRU is executed; otherwise, a general-protection exception (#GP) occurs.", "url": "http://www.felixcloutier.com/x86/WRPKRU.html" }; @@ -4979,8 +5065,8 @@ export function getAsmOpcode(opcode) { case "XBEGIN": return { - "html": "The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort.
On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution and restores architectural state to that corresponding to the outermost XBEGIN instruction. The fallback address following an abort is computed from the outermost XBEGIN instruction.
", - "tooltip": "The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort.", + "html": "The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort. (Use of the 16-bit operand size does not cause this address to be truncated to 16 bits, unlike a near jump to a relative offset.)
On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution and restores architectural state to that corresponding to the outermost XBEGIN instruction. The fallback address following an abort is computed from the outermost XBEGIN instruction.
", + "tooltip": "The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort. (Use of the 16-bit operand size does not cause this address to be truncated to 16 bits, unlike a near jump to a relative offset.)", "url": "http://www.felixcloutier.com/x86/XBEGIN.html" }; @@ -5023,8 +5109,8 @@ export function getAsmOpcode(opcode) { case "VXORPD": case "XORPD": return { - "html": "Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
EVEX.512 encoded version: The first source operand is a ZMM register. The second source operand can be a ZMM register or a vector memory location. The destination operand is a ZMM register conditionally updated with writemask k1.
VEX.256 and EVEX.256 encoded versions: The first source operand is a YMM register. The second source operand is a YMM register or a 256-bit memory location. The destination operand is a YMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 and EVEX.128 encoded versions: The first source operand is an XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding register destination are unmodified.
", - "tooltip": "Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand", + "html": "Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
EVEX.512 encoded version: The first source operand is a ZMM register. The second source operand can be a ZMM register or a vector memory location. The destination operand is a ZMM register conditionally updated with writemask k1.
VEX.256 and EVEX.256 encoded versions: The first source operand is a YMM register. The second source operand is a YMM register or a 256-bit memory location. The destination operand is a YMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.
VEX.128 and EVEX.128 encoded versions: The first source operand is an XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.
128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding register destination are unmodified.
", + "tooltip": "Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.", "url": "http://www.felixcloutier.com/x86/XORPD.html" }; diff --git a/lib/asm-docs/generated/asm-docs-arm32.js b/lib/asm-docs/generated/asm-docs-arm32.ts similarity index 98% rename from lib/asm-docs/generated/asm-docs-arm32.js rename to lib/asm-docs/generated/asm-docs-arm32.ts index 0a7b4b286..d06aef2c0 100644 --- a/lib/asm-docs/generated/asm-docs-arm32.js +++ b/lib/asm-docs/generated/asm-docs-arm32.ts @@ -1,4 +1,6 @@ -export function getAsmOpcode(opcode) { +import {AssemblyInstructionInfo} from '../base'; + +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { if (!opcode) return; switch (opcode) { case "ADC": @@ -2661,7 +2663,7 @@ export function getAsmOpcode(opcode) { case "VEXT": return { "tooltip": "Vector Extract extracts elements from the bottom end of the second operand vector and the top end of the first, concatenates them and places the result in the destination vector.", - "html": "Vector Extract extracts elements from the bottom end of the second operand vector and the top end of the first, concatenates them and places the result in the destination vector.
The elements of the vectors are treated as being 8-bit fields. There is no distinction between data types.
Depending on settings in the
Vector Extract extracts elements from the bottom end of the second operand vector and the top end of the first, concatenates them and places the result in the destination vector.
The elements of the vectors are treated as being 8-bit fields. There is no distinction between data types.
Depending on settings in the
Vector Pairwise Add and Accumulate Long adds adjacent pairs of elements of a vector, and accumulates the results into the elements of the destination vector.
The vectors can be doubleword or quadword. The operand elements can be 8-bit, 16-bit, or 32-bit integers. The result elements are twice the length of the operand elements.
Depending on settings in the
Vector Pairwise Add and Accumulate Long adds adjacent pairs of elements of a vector, and accumulates the results into the elements of the destination vector.
The vectors can be doubleword or quadword. The operand elements can be 8-bit, 16-bit, or 32-bit integers. The result elements are twice the length of the operand elements.
Depending on settings in the
Vector Pairwise Add (integer) adds adjacent pairs of elements of two vectors, and places the results in the destination vector.
The operands and result are doubleword vectors.
The operand and result elements must all be the same type, and can be 8-bit, 16-bit, or 32-bit integers. There is no distinction between signed and unsigned integers.
Depending on settings in the
Vector Pairwise Add (integer) adds adjacent pairs of elements of two vectors, and places the results in the destination vector.
The operands and result are doubleword vectors.
The operand and result elements must all be the same type, and can be 8-bit, 16-bit, or 32-bit integers. There is no distinction between signed and unsigned integers.
Depending on settings in the
Vector Pairwise Add Long adds adjacent pairs of elements of two vectors, and places the results in the destination vector.
The vectors can be doubleword or quadword. The operand elements can be 8-bit, 16-bit, or 32-bit integers. The result elements are twice the length of the operand elements.
Depending on settings in the
Vector Pairwise Add Long adds adjacent pairs of elements of two vectors, and places the results in the destination vector.
The vectors can be doubleword or quadword. The operand elements can be 8-bit, 16-bit, or 32-bit integers. The result elements are twice the length of the operand elements.
Depending on settings in the
Vector Pairwise Maximum compares adjacent pairs of elements in two doubleword vectors, and copies the larger of each pair into the corresponding element in the destination doubleword vector.
Depending on settings in the
Vector Pairwise Maximum compares adjacent pairs of elements in two doubleword vectors, and copies the larger of each pair into the corresponding element in the destination doubleword vector.
Depending on settings in the
Vector Reverse in halfwords reverses the order of 8-bit elements in each halfword of the vector, and places the result in the corresponding destination vector.
There is no distinction between data types, other than size.
Depending on settings in the
Vector Reverse in halfwords reverses the order of 8-bit elements in each halfword of the vector, and places the result in the corresponding destination vector.
There is no distinction between data types, other than size.
Depending on settings in the
Vector Reverse in words reverses the order of 8-bit or 16-bit elements in each word of the vector, and places the result in the corresponding destination vector.
There is no distinction between data types, other than size.
Depending on settings in the
Vector Reverse in words reverses the order of 8-bit or 16-bit elements in each word of the vector, and places the result in the corresponding destination vector.
There is no distinction between data types, other than size.
Depending on settings in the
Vector Reverse in doublewords reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector, and places the result in the corresponding destination vector.
There is no distinction between data types, other than size.
Depending on settings in the
Vector Reverse in doublewords reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector, and places the result in the corresponding destination vector.
There is no distinction between data types, other than size.
Depending on settings in the
Vector Transpose treats the elements of its operand vectors as elements of 2 x 2 matrices, and transposes the matrices.
The elements of the vectors can be 8-bit, 16-bit, or 32-bit. There is no distinction between data types.
Depending on settings in the
Vector Transpose treats the elements of its operand vectors as elements of 2 x 2 matrices, and transposes the matrices.
The elements of the vectors can be 8-bit, 16-bit, or 32-bit. There is no distinction between data types.
Depending on settings in the
Vector Unzip de-interleaves the elements of two vectors.
The elements of the vectors can be 8-bit, 16-bit, or 32-bit. There is no distinction between data types.
Depending on settings in the
Vector Unzip de-interleaves the elements of two vectors.
The elements of the vectors can be 8-bit, 16-bit, or 32-bit. There is no distinction between data types.
Depending on settings in the
Vector Zip interleaves the elements of two vectors.
The elements of the vectors can be 8-bit, 16-bit, or 32-bit. There is no distinction between data types.
Depending on settings in the
Vector Zip interleaves the elements of two vectors.
The elements of the vectors can be 8-bit, 16-bit, or 32-bit. There is no distinction between data types.
Depending on settings in the
Instruction d2f: Convert double to float
Format: d2f
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3) resulting in value'. Then value' is converted to a float result using the round to nearest rounding policy (§2.8). The result is pushed onto the operand stack.
Instruction d2f: Convert double to float
Format: d2f
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type double. It is popped from the operand stack and converted to a float result using the round to nearest rounding policy (§2.8). The result is pushed onto the operand stack.
Instruction d2i: Convert double to int
Format: d2i
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3) resulting in value'. Then value' is converted to an int result. The result is pushed onto the operand stack:
Instruction d2i: Convert double to int
Format: d2i
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type double. It is popped from the operand stack and converted to an int result. The result is pushed onto the operand stack:
Instruction d2l: Convert double to long
Format: d2l
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3) resulting in value'. Then value' is converted to a long. The result is pushed onto the operand stack:
Instruction d2l: Convert double to long
Format: d2l
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type double. It is popped from the operand stack and converted to a long. The result is pushed onto the operand stack:
Instruction dadd: Add double
Format: dadd
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The double result is value1' + value2'. The result is pushed onto the operand stack.
Instruction dadd: Add double
Format: dadd
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack. The double result is value1 + value2. The result is pushed onto the operand stack.
Instruction dastore: Store into double array
Format: dastore
Operand Stack: ..., arrayref, index, value → ...
The arrayref must be of type reference and must refer to an array whose components are of type double. The index must be of type int, and value must be of type double. The arrayref, index, and value are popped from the operand stack. The double value undergoes value set conversion (§2.8.3), resulting in value', which is stored as the component of the array indexed by index.
Instruction dastore: Store into double array
Format: dastore
Operand Stack: ..., arrayref, index, value → ...
The arrayref must be of type reference and must refer to an array whose components are of type double. The index must be of type int, and value must be of type double. The arrayref, index, and value are popped from the operand stack. The double value is stored as the component of the array indexed by index.
Instruction dcmpg: Compare double
Format: dcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. A floating-point comparison is performed:
Instruction dcmpg: Compare double
Format: dcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and a floating-point comparison is performed:
Instruction dcmpl: Compare double
Format: dcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. A floating-point comparison is performed:
Instruction dcmpl: Compare double
Format: dcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and a floating-point comparison is performed:
Instruction ddiv: Divide double
Format: ddiv
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The double result is value1' / value2'. The result is pushed onto the operand stack.
Instruction ddiv: Divide double
Format: ddiv
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack. The double result is value1 / value2. The result is pushed onto the operand stack.
Instruction dmul: Multiply double
Format: dmul
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The double result is value1' * value2'. The result is pushed onto the operand stack.
Instruction dmul: Multiply double
Format: dmul
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack. The double result is value1 * value2. The result is pushed onto the operand stack.
Instruction dneg: Negate double
Format: dneg
Operand Stack: ..., value → ..., result
The value must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The double result is the arithmetic negation of value'. The result is pushed onto the operand stack.
Instruction dneg: Negate double
Format: dneg
Operand Stack: ..., value → ..., result
The value must be of type double. It is popped from the operand stack. The double result is the arithmetic negation of value. The result is pushed onto the operand stack.
Instruction drem: Remainder double
Format: drem
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The double result is calculated and pushed onto the operand stack.
Instruction drem: Remainder double
Format: drem
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack. The double result is calculated and pushed onto the operand stack.
Instruction dreturn: Return double from method
Format: dreturn
Operand Stack: ..., value → [empty]
The current method must have return type double. The value must be of type double. If the current method is a synchronized method, the monitor entered or reentered on invocation of the method is updated and possibly exited as if by execution of a monitorexit instruction (§monitorexit) in the current thread. If no exception is thrown, value is popped from the operand stack of the current frame (§2.6) and undergoes value set conversion (§2.8.3), resulting in value'. The value' is pushed onto the operand stack of the frame of the invoker. Any other values on the operand stack of the current method are discarded.
Instruction dreturn: Return double from method
Format: dreturn
Operand Stack: ..., value → [empty]
The current method must have return type double. The value must be of type double. If the current method is a synchronized method, the monitor entered or reentered on invocation of the method is updated and possibly exited as if by execution of a monitorexit instruction (§monitorexit) in the current thread. If no exception is thrown, value is popped from the operand stack of the current frame (§2.6) and pushed onto the operand stack of the frame of the invoker. Any other values on the operand stack of the current method are discarded.
Instruction dstore: Store double into local variable
Format: dstore index
Operand Stack: ..., value → ...
The index is an unsigned byte. Both index and index+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The local variables at index and index+1 are set to value'.
Instruction dstore: Store double into local variable
Format: dstore index
Operand Stack: ..., value → ...
The index is an unsigned byte. Both index and index+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack. The local variables at index and index+1 are set to value.
Instruction dstore_0: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The local variables at <n> and <n>+1 are set to value'.
Instruction dstore_0: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack. The local variables at <n> and <n>+1 are set to value.
Instruction dstore_1: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The local variables at <n> and <n>+1 are set to value'.
Instruction dstore_1: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack. The local variables at <n> and <n>+1 are set to value.
Instruction dstore_2: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The local variables at <n> and <n>+1 are set to value'.
Instruction dstore_2: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack. The local variables at <n> and <n>+1 are set to value.
Instruction dstore_3: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The local variables at <n> and <n>+1 are set to value'.
Instruction dstore_3: Store double into local variable
Format: dstore_[n]
Operand Stack: ..., value → ...
Both <n> and <n>+1 must be indices into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type double. It is popped from the operand stack. The local variables at <n> and <n>+1 are set to value.
Instruction dsub: Subtract double
Format: dsub
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The double result is value1' - value2'. The result is pushed onto the operand stack.
Instruction dsub: Subtract double
Format: dsub
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type double. The values are popped from the operand stack. The double result is value1 - value2. The result is pushed onto the operand stack.
Instruction f2d: Convert float to double
Format: f2d
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. Then value' is converted to a double result. This result is pushed onto the operand stack.
Instruction f2d: Convert float to double
Format: f2d
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type float. It is popped from the operand stack and converted to a double result. The result is pushed onto the operand stack.
Instruction f2i: Convert float to int
Format: f2i
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. Then value' is converted to an int result. This result is pushed onto the operand stack:
Instruction f2i: Convert float to int
Format: f2i
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type float. It is popped from the operand stack and converted to an int result. The result is pushed onto the operand stack:
Instruction f2l: Convert float to long
Format: f2l
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. Then value' is converted to a long result. This result is pushed onto the operand stack:
Instruction f2l: Convert float to long
Format: f2l
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type float. It is popped from the operand stack and converted to a long result. The result is pushed onto the operand stack:
Instruction fadd: Add float
Format: fadd
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The float result is value1' + value2'. The result is pushed onto the operand stack.
Instruction fadd: Add float
Format: fadd
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack. The float result is value1 + value2. The result is pushed onto the operand stack.
Instruction fastore: Store into float array
Format: fastore
Operand Stack: ..., arrayref, index, value → ...
The arrayref must be of type reference and must refer to an array whose components are of type float. The index must be of type int, and the value must be of type float. The arrayref, index, and value are popped from the operand stack. The float value undergoes value set conversion (§2.8.3), resulting in value', and value' is stored as the component of the array indexed by index.
Instruction fastore: Store into float array
Format: fastore
Operand Stack: ..., arrayref, index, value → ...
The arrayref must be of type reference and must refer to an array whose components are of type float. The index must be of type int, and the value must be of type float. The arrayref, index, and value are popped from the operand stack. The float value is stored as the component of the array indexed by index.
Instruction fcmpg: Compare float
Format: fcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. A floating-point comparison is performed:
Instruction fcmpg: Compare float
Format: fcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and a floating-point comparison is performed:
Instruction fcmpl: Compare float
Format: fcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. A floating-point comparison is performed:
Instruction fcmpl: Compare float
Format: fcmp[op]
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and a floating-point comparison is performed:
Instruction fdiv: Divide float
Format: fdiv
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The float result is value1' / value2'. The result is pushed onto the operand stack.
Instruction fdiv: Divide float
Format: fdiv
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack. The float result is value1 / value2. The result is pushed onto the operand stack.
Instruction fmul: Multiply float
Format: fmul
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The float result is value1' * value2'. The result is pushed onto the operand stack.
Instruction fmul: Multiply float
Format: fmul
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack. The float result is value1 * value2. The result is pushed onto the operand stack.
Instruction fneg: Negate float
Format: fneg
Operand Stack: ..., value → ..., result
The value must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The float result is the arithmetic negation of value'. This result is pushed onto the operand stack.
Instruction fneg: Negate float
Format: fneg
Operand Stack: ..., value → ..., result
The value must be of type float. It is popped from the operand stack. The float result is the arithmetic negation of value. The result is pushed onto the operand stack.
Instruction frem: Remainder float
Format: frem
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The float result is calculated and pushed onto the operand stack.
Instruction frem: Remainder float
Format: frem
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack. The float result is calculated and pushed onto the operand stack.
Instruction freturn: Return float from method
Format: freturn
Operand Stack: ..., value → [empty]
The current method must have return type float. The value must be of type float. If the current method is a synchronized method, the monitor entered or reentered on invocation of the method is updated and possibly exited as if by execution of a monitorexit instruction (§monitorexit) in the current thread. If no exception is thrown, value is popped from the operand stack of the current frame (§2.6) and undergoes value set conversion (§2.8.3), resulting in value'. The value' is pushed onto the operand stack of the frame of the invoker. Any other values on the operand stack of the current method are discarded.
Instruction freturn: Return float from method
Format: freturn
Operand Stack: ..., value → [empty]
The current method must have return type float. The value must be of type float. If the current method is a synchronized method, the monitor entered or reentered on invocation of the method is updated and possibly exited as if by execution of a monitorexit instruction (§monitorexit) in the current thread. If no exception is thrown, value is popped from the operand stack of the current frame (§2.6) and pushed onto the operand stack of the frame of the invoker. Any other values on the operand stack of the current method are discarded.
Instruction fstore: Store float into local variable
Format: fstore index
Operand Stack: ..., value → ...
The index is an unsigned byte that must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The value of the local variable at index is set to value'.
Instruction fstore: Store float into local variable
Format: fstore index
Operand Stack: ..., value → ...
The index is an unsigned byte that must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack, and the value of the local variable at index is set to value.
Instruction fstore_0: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The value of the local variable at <n> is set to value'.
Instruction fstore_0: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack, and the value of the local variable at <n> is set to value.
Instruction fstore_1: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The value of the local variable at <n> is set to value'.
Instruction fstore_1: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack, and the value of the local variable at <n> is set to value.
Instruction fstore_2: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The value of the local variable at <n> is set to value'.
Instruction fstore_2: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack, and the value of the local variable at <n> is set to value.
Instruction fstore_3: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack and undergoes value set conversion (§2.8.3), resulting in value'. The value of the local variable at <n> is set to value'.
Instruction fstore_3: Store float into local variable
Format: fstore_[n]
Operand Stack: ..., value → ...
The <n> must be an index into the local variable array of the current frame (§2.6). The value on the top of the operand stack must be of type float. It is popped from the operand stack, and the value of the local variable at <n> is set to value.
Instruction fsub: Subtract float
Format: fsub
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack and undergo value set conversion (§2.8.3), resulting in value1' and value2'. The float result is value1' - value2'. The result is pushed onto the operand stack.
Instruction fsub: Subtract float
Format: fsub
Operand Stack: ..., value1, value2 → ..., result
Both value1 and value2 must be of type float. The values are popped from the operand stack. The float result is value1 - value2. The result is pushed onto the operand stack.
Instruction i2f: Convert int to float
Format: i2f
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type int. It is popped from the operand stack and converted to the float result using the round to nearest rounding policy (§2.8). The result is pushed onto the operand stack.
Instruction i2f: Convert int to float
Format: i2f
Operand Stack: ..., value → ..., result
The value on the top of the operand stack must be of type int. It is popped from the operand stack and converted to a float result using the round to nearest rounding policy (§2.8). The result is pushed onto the operand stack.
ret’ Instruction¶ret <type> <value> ; Return a value from a non-void function
-ret void ; Return from void function
-The ‘ret’ instruction is used to return control flow (and optionally
-a value) from a function back to the caller.
There are two forms of the ‘ret’ instruction: one that returns a
-value and then causes control flow, and one that just causes control
-flow to occur.
The ‘ret’ instruction optionally accepts a single argument, the
-return value. The type of the return value must be a ‘first
-class’ type.
A function is not well formed if it has a non-void
-return type and contains a ‘ret’ instruction with no return value or
-a return value with a type that does not match its type, or if it has a
-void return type and contains a ‘ret’ instruction with a return
-value.
When the ‘ret’ instruction is executed, control flow returns back to
-the calling function’s context. If the caller is a
-“call” instruction, execution continues at the
-instruction after the call. If the caller was an
-“invoke” instruction, execution continues at the
-beginning of the “normal” destination block. If the instruction returns
-a value, that value shall set the call or invoke instruction’s return
-value.
ret i32 5 ; Return an integer value of 5
-ret void ; Return from a void function
-ret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2
-br’ Instruction¶br i1 <cond>, label <iftrue>, label <iffalse>
-br label <dest> ; Unconditional branch
-The ‘br’ instruction is used to cause control flow to transfer to a
-different basic block in the current function. There are two forms of
-this instruction, corresponding to a conditional branch and an
-unconditional branch.
The conditional branch form of the ‘br’ instruction takes a single
-‘i1’ value and two ‘label’ values. The unconditional form of the
-‘br’ instruction takes a single ‘label’ value as a target.
Upon execution of a conditional ‘br’ instruction, the ‘i1’
-argument is evaluated. If the value is true, control flows to the
-‘iftrue’ label argument. If “cond” is false, control flows
-to the ‘iffalse’ label argument.
-If ‘cond’ is poison or undef, this instruction has undefined
-behavior.
Test:
- %cond = icmp eq i32 %a, %b
- br i1 %cond, label %IfEqual, label %IfUnequal
-IfEqual:
- ret i32 1
-IfUnequal:
- ret i32 0
-switch’ Instruction¶The ‘switch’ instruction is used to transfer control flow to one of
-several different places. It is a generalization of the ‘br’
-instruction, allowing a branch to occur to one of many possible
-destinations.
The ‘switch’ instruction uses three parameters: an integer
-comparison value ‘value’, a default ‘label’ destination, and an
-array of pairs of comparison value constants and ‘label’s. The table
-is not allowed to contain duplicate constant entries.
The switch instruction specifies a table of values and destinations.
-When the ‘switch’ instruction is executed, this table is searched
-for the given value. If the value is found, control flow is transferred
-to the corresponding destination; otherwise, control flow is transferred
-to the default destination.
-If ‘value’ is poison or undef, this instruction has undefined
-behavior.
Depending on properties of the target machine and the particular
-switch instruction, this instruction may be code generated in
-different ways. For example, it could be generated as a series of
-chained conditional branches or with a lookup table.
; Emulate a conditional br instruction
-%Val = zext i1 %value to i32
-switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
-
-; Emulate an unconditional br instruction
-switch i32 0, label %dest [ ]
-
-; Implement a jump table:
-switch i32 %val, label %otherwise [ i32 0, label %onzero
- i32 1, label %onone
- i32 2, label %ontwo ]
-indirectbr’ Instruction¶The ‘indirectbr’ instruction implements an indirect branch to a
-label within the current function, whose address is specified by
-“address”. Address must be derived from a
-blockaddress constant.
The ‘address’ argument is the address of the label to jump to. The
-rest of the arguments indicate the full set of possible destinations
-that the address may point to. Blocks are allowed to occur multiple
-times in the destination list, though this isn’t particularly useful.
This destination list is required so that dataflow analysis has an -accurate understanding of the CFG.
-Control transfers to the block specified in the address argument. All
-possible destination blocks must be listed in the label list, otherwise
-this instruction has undefined behavior. This implies that jumps to
-labels defined in other functions have undefined behavior as well.
-If ‘address’ is poison or undef, this instruction has undefined
-behavior.
This is typically implemented with a jump through a register.
-invoke’ Instruction¶<result> = invoke [cconv] [ret attrs] [addrspace(<num>)] <ty>|<fnty> <fnptrval>(<function args>) [fn attrs]
- [operand bundles] to label <normal label> unwind label <exception label>
-The ‘invoke’ instruction causes control to transfer to a specified
-function, with the possibility of control flow transfer to either the
-‘normal’ label or the ‘exception’ label. If the callee function
-returns with the “ret” instruction, control flow will return to the
-“normal” label. If the callee (or any indirect callees) returns via the
-“resume” instruction or other exception handling
-mechanism, control is interrupted and continued at the dynamically
-nearest “exception” label.
The ‘exception’ label is a landing
-pad for the exception. As such,
-‘exception’ label is required to have the
-“landingpad” instruction, which contains the
-information about the behavior of the program after unwinding happens,
-as its first non-PHI instruction. The restrictions on the
-“landingpad” instruction’s tightly couples it to the “invoke”
-instruction, so that the important information contained within the
-“landingpad” instruction can’t be lost through normal code motion.
This instruction requires several arguments:
-zeroext’, ‘signext’, and ‘inreg’ attributes
-are valid here.ty’: the type of the call instruction itself which is also the
-type of the return value. Functions that return no value are marked
-void.fnty’: shall be the signature of the function being invoked. The
-argument types must match the types implied by this signature. This
-type can be omitted if the function is not varargs.fnptrval’: An LLVM value containing a pointer to a function to
-be invoked. In most cases, this is a direct function invocation, but
-indirect invoke’s are just as possible, calling an arbitrary pointer
-to function value.function args’: argument list whose types match the function
-signature argument types and parameter attributes. All arguments must
-be of first class type. If the function signature
-indicates the function accepts a variable number of arguments, the
-extra arguments can be specified.normal label’: the label reached when the called function
-executes a ‘ret’ instruction.exception label’: the label reached when a callee returns via
-the resume instruction or other exception handling
-mechanism.This instruction is designed to operate as a standard ‘call’
-instruction in most regards. The primary difference is that it
-establishes an association with a label, which is used by the runtime
-library to unwind the stack.
This instruction is used in languages with destructors to ensure that
-proper cleanup is performed in the case of either a longjmp or a
-thrown exception. Additionally, this is important for implementation of
-‘catch’ clauses in high-level languages that support them.
For the purposes of the SSA form, the definition of the value returned
-by the ‘invoke’ instruction is deemed to occur on the edge from the
-current block to the “normal” label. If the callee unwinds then no
-return value is available.
%retval = invoke i32 @Test(i32 15) to label %Continue
- unwind label %TestCleanup ; i32:retval set
-%retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue
- unwind label %TestCleanup ; i32:retval set
-callbr’ Instruction¶<result> = callbr [cconv] [ret attrs] [addrspace(<num>)] <ty>|<fnty> <fnptrval>(<function args>) [fn attrs]
- [operand bundles] to label <fallthrough label> [indirect labels]
-The ‘callbr’ instruction causes control to transfer to a specified
-function, with the possibility of control flow transfer to either the
-‘fallthrough’ label or one of the ‘indirect’ labels.
This instruction should only be used to implement the “goto” feature of gcc -style inline assembly. Any other usage is an error in the IR verifier.
-This instruction requires several arguments:
-zeroext’, ‘signext’, and ‘inreg’ attributes
-are valid here.ty’: the type of the call instruction itself which is also the
-type of the return value. Functions that return no value are marked
-void.fnty’: shall be the signature of the function being called. The
-argument types must match the types implied by this signature. This
-type can be omitted if the function is not varargs.fnptrval’: An LLVM value containing a pointer to a function to
-be called. In most cases, this is a direct function call, but
-other callbr’s are just as possible, calling an arbitrary pointer
-to function value.function args’: argument list whose types match the function
-signature argument types and parameter attributes. All arguments must
-be of first class type. If the function signature
-indicates the function accepts a variable number of arguments, the
-extra arguments can be specified.fallthrough label’: the label reached when the inline assembly’s
-execution exits the bottom.indirect labels’: the labels reached when a callee transfers control
-to a location other than the ‘fallthrough label’. The blockaddress
-constant for these should also be in the list of ‘function args’.This instruction is designed to operate as a standard ‘call’
-instruction in most regards. The primary difference is that it
-establishes an association with additional labels to define where control
-flow goes after the call.
The output values of a ‘callbr’ instruction are available only to
-the ‘fallthrough’ block, not to any ‘indirect’ blocks(s).
The only use of this today is to implement the “goto” feature of gcc inline -assembly where additional labels can be provided as locations for the inline -assembly to jump to.
-; "asm goto" without output constraints.
-callbr void asm "", "r,X"(i32 %x, i8 *blockaddress(@foo, %indirect))
- to label %fallthrough [label %indirect]
-
-; "asm goto" with output constraints.
-<result> = callbr i32 asm "", "=r,r,X"(i32 %x, i8 *blockaddress(@foo, %indirect))
- to label %fallthrough [label %indirect]
-resume’ Instruction¶The ‘resume’ instruction requires one argument, which must have the
-same type as the result of any ‘landingpad’ instruction in the same
-function.
The ‘resume’ instruction resumes propagation of an existing
-(in-flight) exception whose unwinding was interrupted with a
-landingpad instruction.
catchswitch’ Instruction¶<resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind to caller
-<resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind label <default>
-The ‘catchswitch’ instruction is used by LLVM’s exception handling system to describe the set of possible catch handlers
-that may be executed by the EH personality routine.
The parent argument is the token of the funclet that contains the
-catchswitch instruction. If the catchswitch is not inside a funclet,
-this operand may be the token none.
The default argument is the label of another basic block beginning with
-either a cleanuppad or catchswitch instruction. This unwind destination
-must be a legal target with respect to the parent links, as described in
-the exception handling documentation.
The handlers are a nonempty list of successor blocks that each begin with a
-catchpad instruction.
Executing this instruction transfers control to one of the successors in
-handlers, if appropriate, or continues to unwind via the unwind label if
-present.
The catchswitch is both a terminator and a “pad” instruction, meaning that
-it must be both the first non-phi instruction and last instruction in the basic
-block. Therefore, it must be the only non-phi instruction in the block.
dispatch1:
- %cs1 = catchswitch within none [label %handler0, label %handler1] unwind to caller
-dispatch2:
- %cs2 = catchswitch within %parenthandler [label %handler0] unwind label %cleanup
-catchret’ Instruction¶The first argument to a ‘catchret’ indicates which catchpad it
-exits. It must be a catchpad.
-The second argument to a ‘catchret’ specifies where control will
-transfer to next.
The ‘catchret’ instruction ends an existing (in-flight) exception whose
-unwinding was interrupted with a catchpad instruction. The
-personality function gets a chance to execute arbitrary
-code to, for example, destroy the active exception. Control then transfers to
-normal.
The token argument must be a token produced by a catchpad instruction.
-If the specified catchpad is not the most-recently-entered not-yet-exited
-funclet pad (as described in the EH documentation),
-the catchret’s behavior is undefined.
cleanupret’ Instruction¶cleanupret from <value> unwind label <continue>
-cleanupret from <value> unwind to caller
-The ‘cleanupret’ instruction is a terminator instruction that has
-an optional successor.
The ‘cleanupret’ instruction requires one argument, which indicates
-which cleanuppad it exits, and must be a cleanuppad.
-If the specified cleanuppad is not the most-recently-entered not-yet-exited
-funclet pad (as described in the EH documentation),
-the cleanupret’s behavior is undefined.
The ‘cleanupret’ instruction also has an optional successor, continue,
-which must be the label of another basic block beginning with either a
-cleanuppad or catchswitch instruction. This unwind destination must
-be a legal target with respect to the parent links, as described in the
-exception handling documentation.
The ‘cleanupret’ instruction indicates to the
-personality function that one
-cleanuppad it transferred control to has ended.
-It transfers control to continue or unwinds out of the function.
cleanupret from %cleanup unwind to caller
-cleanupret from %cleanup unwind label %continue
-unreachable’ Instruction¶The ‘unreachable’ instruction has no defined semantics. This
-instruction is used to inform the optimizer that a particular portion of
-the code is not reachable. This can be used to indicate that the code
-after a no-return function cannot be reached, and other facts.
The ‘unreachable’ instruction has no defined semantics.
fneg’ Instruction¶The argument to the ‘fneg’ instruction must be a
-floating-point or vector of
-floating-point values.
The value produced is a copy of the operand with its sign bit flipped. -This instruction can also take any number of fast-math -flags, which are optimization hints to enable otherwise -unsafe floating-point optimizations:
-add’ Instruction¶<result> = add <ty> <op1>, <op2> ; yields ty:result
-<result> = add nuw <ty> <op1>, <op2> ; yields ty:result
-<result> = add nsw <ty> <op1>, <op2> ; yields ty:result
-<result> = add nuw nsw <ty> <op1>, <op2> ; yields ty:result
-The two arguments to the ‘add’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The value produced is the integer sum of the two operands.
-If the sum has unsigned overflow, the result returned is the -mathematical result modulo 2n, where n is the bit width of -the result.
-Because LLVM integers use a two’s complement representation, this -instruction is appropriate for both signed and unsigned integers.
-nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”,
-respectively. If the nuw and/or nsw keywords are present, the
-result value of the add is a poison value if
-unsigned and/or signed overflow, respectively, occurs.
fadd’ Instruction¶The two arguments to the ‘fadd’ instruction must be
-floating-point or vector of
-floating-point values. Both arguments must have identical types.
The value produced is the floating-point sum of the two operands. -This instruction is assumed to execute in the default floating-point -environment. -This instruction can also take any number of fast-math -flags, which are optimization hints to enable otherwise -unsafe floating-point optimizations:
-sub’ Instruction¶<result> = sub <ty> <op1>, <op2> ; yields ty:result
-<result> = sub nuw <ty> <op1>, <op2> ; yields ty:result
-<result> = sub nsw <ty> <op1>, <op2> ; yields ty:result
-<result> = sub nuw nsw <ty> <op1>, <op2> ; yields ty:result
-The ‘sub’ instruction returns the difference of its two operands.
Note that the ‘sub’ instruction is used to represent the ‘neg’
-instruction present in most other intermediate representations.
The two arguments to the ‘sub’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The value produced is the integer difference of the two operands.
-If the difference has unsigned overflow, the result returned is the -mathematical result modulo 2n, where n is the bit width of -the result.
-Because LLVM integers use a two’s complement representation, this -instruction is appropriate for both signed and unsigned integers.
-nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”,
-respectively. If the nuw and/or nsw keywords are present, the
-result value of the sub is a poison value if
-unsigned and/or signed overflow, respectively, occurs.
<result> = sub i32 4, %var ; yields i32:result = 4 - %var
-<result> = sub i32 0, %val ; yields i32:result = -%var
-fsub’ Instruction¶The two arguments to the ‘fsub’ instruction must be
-floating-point or vector of
-floating-point values. Both arguments must have identical types.
The value produced is the floating-point difference of the two operands. -This instruction is assumed to execute in the default floating-point -environment. -This instruction can also take any number of fast-math -flags, which are optimization hints to enable otherwise -unsafe floating-point optimizations:
-<result> = fsub float 4.0, %var ; yields float:result = 4.0 - %var
-<result> = fsub float -0.0, %val ; yields float:result = -%var
-mul’ Instruction¶<result> = mul <ty> <op1>, <op2> ; yields ty:result
-<result> = mul nuw <ty> <op1>, <op2> ; yields ty:result
-<result> = mul nsw <ty> <op1>, <op2> ; yields ty:result
-<result> = mul nuw nsw <ty> <op1>, <op2> ; yields ty:result
-The two arguments to the ‘mul’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The value produced is the integer product of the two operands.
-If the result of the multiplication has unsigned overflow, the result -returned is the mathematical result modulo 2n, where n is the -bit width of the result.
-Because LLVM integers use a two’s complement representation, and the
-result is the same width as the operands, this instruction returns the
-correct result for both signed and unsigned integers. If a full product
-(e.g. i32 * i32 -> i64) is needed, the operands should be
-sign-extended or zero-extended as appropriate to the width of the full
-product.
nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”,
-respectively. If the nuw and/or nsw keywords are present, the
-result value of the mul is a poison value if
-unsigned and/or signed overflow, respectively, occurs.
fmul’ Instruction¶The two arguments to the ‘fmul’ instruction must be
-floating-point or vector of
-floating-point values. Both arguments must have identical types.
The value produced is the floating-point product of the two operands. -This instruction is assumed to execute in the default floating-point -environment. -This instruction can also take any number of fast-math -flags, which are optimization hints to enable otherwise -unsafe floating-point optimizations:
-udiv’ Instruction¶<result> = udiv <ty> <op1>, <op2> ; yields ty:result
-<result> = udiv exact <ty> <op1>, <op2> ; yields ty:result
-The two arguments to the ‘udiv’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The value produced is the unsigned integer quotient of the two operands.
-Note that unsigned integer division and signed integer division are
-distinct operations; for signed integer division, use ‘sdiv’.
Division by zero is undefined behavior. For vectors, if any element -of the divisor is zero, the operation has undefined behavior.
-If the exact keyword is present, the result value of the udiv is
-a poison value if %op1 is not a multiple of %op2 (as
-such, “((a udiv exact b) mul b) == a”).
sdiv’ Instruction¶<result> = sdiv <ty> <op1>, <op2> ; yields ty:result
-<result> = sdiv exact <ty> <op1>, <op2> ; yields ty:result
-The two arguments to the ‘sdiv’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The value produced is the signed integer quotient of the two operands -rounded towards zero.
-Note that signed integer division and unsigned integer division are
-distinct operations; for unsigned integer division, use ‘udiv’.
Division by zero is undefined behavior. For vectors, if any element -of the divisor is zero, the operation has undefined behavior. -Overflow also leads to undefined behavior; this is a rare case, but can -occur, for example, by doing a 32-bit division of -2147483648 by -1.
-If the exact keyword is present, the result value of the sdiv is
-a poison value if the result would be rounded.
fdiv’ Instruction¶The two arguments to the ‘fdiv’ instruction must be
-floating-point or vector of
-floating-point values. Both arguments must have identical types.
The value produced is the floating-point quotient of the two operands. -This instruction is assumed to execute in the default floating-point -environment. -This instruction can also take any number of fast-math -flags, which are optimization hints to enable otherwise -unsafe floating-point optimizations:
-urem’ Instruction¶The ‘urem’ instruction returns the remainder from the unsigned
-division of its two arguments.
The two arguments to the ‘urem’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
This instruction returns the unsigned integer remainder of a division. -This instruction always performs an unsigned division to get the -remainder.
-Note that unsigned integer remainder and signed integer remainder are
-distinct operations; for signed integer remainder, use ‘srem’.
Taking the remainder of a division by zero is undefined behavior. -For vectors, if any element of the divisor is zero, the operation has -undefined behavior.
-srem’ Instruction¶The ‘srem’ instruction returns the remainder from the signed
-division of its two operands. This instruction can also take
-vector versions of the values in which case the elements
-must be integers.
The two arguments to the ‘srem’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
This instruction returns the remainder of a division (where the result
-is either zero or has the same sign as the dividend, op1), not the
-modulo operator (where the result is either zero or has the same sign
-as the divisor, op2) of a value. For more information about the
-difference, see The Math
-Forum. For a
-table of how this is implemented in various languages, please see
-Wikipedia: modulo
-operation.
Note that signed integer remainder and unsigned integer remainder are
-distinct operations; for unsigned integer remainder, use ‘urem’.
Taking the remainder of a division by zero is undefined behavior. -For vectors, if any element of the divisor is zero, the operation has -undefined behavior. -Overflow also leads to undefined behavior; this is a rare case, but can -occur, for example, by taking the remainder of a 32-bit division of --2147483648 by -1. (The remainder doesn’t actually overflow, but this -rule lets srem be implemented using instructions that return both the -result of the division and the remainder.)
-frem’ Instruction¶The two arguments to the ‘frem’ instruction must be
-floating-point or vector of
-floating-point values. Both arguments must have identical types.
The value produced is the floating-point remainder of the two operands.
-This is the same output as a libm ‘fmod’ function, but without any
-possibility of setting errno. The remainder has the same sign as the
-dividend.
-This instruction is assumed to execute in the default floating-point
-environment.
-This instruction can also take any number of fast-math
-flags, which are optimization hints to enable otherwise
-unsafe floating-point optimizations:
shl’ Instruction¶<result> = shl <ty> <op1>, <op2> ; yields ty:result
-<result> = shl nuw <ty> <op1>, <op2> ; yields ty:result
-<result> = shl nsw <ty> <op1>, <op2> ; yields ty:result
-<result> = shl nuw nsw <ty> <op1>, <op2> ; yields ty:result
-The ‘shl’ instruction returns the first operand shifted to the left
-a specified number of bits.
Both arguments to the ‘shl’ instruction must be the same
-integer or vector of integer type.
-‘op2’ is treated as an unsigned value.
The value produced is op1 * 2op2 mod 2n,
-where n is the width of the result. If op2 is (statically or
-dynamically) equal to or larger than the number of bits in
-op1, this instruction returns a poison value.
-If the arguments are vectors, each vector element of op1 is shifted
-by the corresponding shift amount in op2.
If the nuw keyword is present, then the shift produces a poison
-value if it shifts out any non-zero bits.
-If the nsw keyword is present, then the shift produces a poison
-value if it shifts out any bits that disagree with the resultant sign bit.
<result> = shl i32 4, %var ; yields i32: 4 << %var
-<result> = shl i32 4, 2 ; yields i32: 16
-<result> = shl i32 1, 10 ; yields i32: 1024
-<result> = shl i32 1, 32 ; undefined
-<result> = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2> ; yields: result=<2 x i32> < i32 2, i32 4>
-lshr’ Instruction¶<result> = lshr <ty> <op1>, <op2> ; yields ty:result
-<result> = lshr exact <ty> <op1>, <op2> ; yields ty:result
-The ‘lshr’ instruction (logical shift right) returns the first
-operand shifted to the right a specified number of bits with zero fill.
Both arguments to the ‘lshr’ instruction must be the same
-integer or vector of integer type.
-‘op2’ is treated as an unsigned value.
This instruction always performs a logical shift right operation. The
-most significant bits of the result will be filled with zero bits after
-the shift. If op2 is (statically or dynamically) equal to or larger
-than the number of bits in op1, this instruction returns a poison
-value. If the arguments are vectors, each vector element
-of op1 is shifted by the corresponding shift amount in op2.
If the exact keyword is present, the result value of the lshr is
-a poison value if any of the bits shifted out are non-zero.
<result> = lshr i32 4, 1 ; yields i32:result = 2
-<result> = lshr i32 4, 2 ; yields i32:result = 1
-<result> = lshr i8 4, 3 ; yields i8:result = 0
-<result> = lshr i8 -2, 1 ; yields i8:result = 0x7F
-<result> = lshr i32 1, 32 ; undefined
-<result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2> ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>
-ashr’ Instruction¶<result> = ashr <ty> <op1>, <op2> ; yields ty:result
-<result> = ashr exact <ty> <op1>, <op2> ; yields ty:result
-The ‘ashr’ instruction (arithmetic shift right) returns the first
-operand shifted to the right a specified number of bits with sign
-extension.
Both arguments to the ‘ashr’ instruction must be the same
-integer or vector of integer type.
-‘op2’ is treated as an unsigned value.
This instruction always performs an arithmetic shift right operation,
-The most significant bits of the result will be filled with the sign bit
-of op1. If op2 is (statically or dynamically) equal to or larger
-than the number of bits in op1, this instruction returns a poison
-value. If the arguments are vectors, each vector element
-of op1 is shifted by the corresponding shift amount in op2.
If the exact keyword is present, the result value of the ashr is
-a poison value if any of the bits shifted out are non-zero.
<result> = ashr i32 4, 1 ; yields i32:result = 2
-<result> = ashr i32 4, 2 ; yields i32:result = 1
-<result> = ashr i8 4, 3 ; yields i8:result = 0
-<result> = ashr i8 -2, 1 ; yields i8:result = -1
-<result> = ashr i32 1, 32 ; undefined
-<result> = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3> ; yields: result=<2 x i32> < i32 -1, i32 0>
-and’ Instruction¶The two arguments to the ‘and’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The truth table used for the ‘and’ instruction is:
| In0 | -In1 | -Out | -
| 0 | -0 | -0 | -
| 0 | -1 | -0 | -
| 1 | -0 | -0 | -
| 1 | -1 | -1 | -
<result> = and i32 4, %var ; yields i32:result = 4 & %var
-<result> = and i32 15, 40 ; yields i32:result = 8
-<result> = and i32 4, 8 ; yields i32:result = 0
-or’ Instruction¶The two arguments to the ‘or’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The truth table used for the ‘or’ instruction is:
| In0 | -In1 | -Out | -
| 0 | -0 | -0 | -
| 0 | -1 | -1 | -
| 1 | -0 | -1 | -
| 1 | -1 | -1 | -
<result> = or i32 4, %var ; yields i32:result = 4 | %var
-<result> = or i32 15, 40 ; yields i32:result = 47
-<result> = or i32 4, 8 ; yields i32:result = 12
-xor’ Instruction¶The ‘xor’ instruction returns the bitwise logical exclusive or of
-its two operands. The xor is used to implement the “one’s
-complement” operation, which is the “~” operator in C.
The two arguments to the ‘xor’ instruction must be
-integer or vector of integer values. Both
-arguments must have identical types.
The truth table used for the ‘xor’ instruction is:
| In0 | -In1 | -Out | -
| 0 | -0 | -0 | -
| 0 | -1 | -1 | -
| 1 | -0 | -1 | -
| 1 | -1 | -0 | -
<result> = xor i32 4, %var ; yields i32:result = 4 ^ %var
-<result> = xor i32 15, 40 ; yields i32:result = 39
-<result> = xor i32 4, 8 ; yields i32:result = 12
-<result> = xor i32 %V, -1 ; yields i32:result = ~%V
-extractelement’ Instruction¶<result> = extractelement <n x <ty>> <val>, <ty2> <idx> ; yields <ty>
-<result> = extractelement <vscale x n x <ty>> <val>, <ty2> <idx> ; yields <ty>
-The ‘extractelement’ instruction extracts a single scalar element
-from a vector at a specified index.
The first operand of an ‘extractelement’ instruction is a value of
-vector type. The second operand is an index indicating
-the position from which to extract the element. The index may be a
-variable of any integer type.
The result is a scalar of the same type as the element type of val.
-Its value is the value at position idx of val. If idx
-exceeds the length of val for a fixed-length vector, the result is a
-poison value. For a scalable vector, if the value
-of idx exceeds the runtime length of the vector, the result is a
-poison value.
insertelement’ Instruction¶<result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> ; yields <n x <ty>>
-<result> = insertelement <vscale x n x <ty>> <val>, <ty> <elt>, <ty2> <idx> ; yields <vscale x n x <ty>>
-The ‘insertelement’ instruction inserts a scalar element into a
-vector at a specified index.
The first operand of an ‘insertelement’ instruction is a value of
-vector type. The second operand is a scalar value whose
-type must equal the element type of the first operand. The third operand
-is an index indicating the position at which to insert the value. The
-index may be a variable of any integer type.
The result is a vector of the same type as val. Its element values
-are those of val except at position idx, where it gets the value
-elt. If idx exceeds the length of val for a fixed-length vector,
-the result is a poison value. For a scalable vector,
-if the value of idx exceeds the runtime length of the vector, the result
-is a poison value.
shufflevector’ Instruction¶<result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask> ; yields <m x <ty>>
-<result> = shufflevector <vscale x n x <ty>> <v1>, <vscale x n x <ty>> v2, <vscale x m x i32> <mask> ; yields <vscale x m x <ty>>
-The ‘shufflevector’ instruction constructs a permutation of elements
-from two input vectors, returning a vector with the same element type as
-the input and length that is the same as the shuffle mask.
The first two operands of a ‘shufflevector’ instruction are vectors
-with the same type. The third argument is a shuffle mask vector constant
-whose element type is i32. The mask vector elements must be constant
-integers or undef values. The result of the instruction is a vector
-whose length is the same as the shuffle mask and whose element type is the
-same as the element type of the first two operands.
The elements of the two input vectors are numbered from left to right -across both of the vectors. For each element of the result vector, the -shuffle mask selects an element from one of the input vectors to copy -to the result. Non-negative elements in the mask represent an index -into the concatenated pair of input vectors.
-If the shuffle mask is undefined, the result vector is undefined. If -the shuffle mask selects an undefined element from one of the input -vectors, the resulting element is undefined. An undefined element -in the mask vector specifies that the resulting element is undefined. -An undefined element in the mask vector prevents a poisoned vector -element from propagating.
-For scalable vectors, the only valid mask values at present are
-zeroinitializer and undef, since we cannot write all indices as
-literals for a vector with a length unknown at compile time.
<result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
- <4 x i32> <i32 0, i32 4, i32 1, i32 5> ; yields <4 x i32>
-<result> = shufflevector <4 x i32> %v1, <4 x i32> undef,
- <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; yields <4 x i32> - Identity shuffle.
-<result> = shufflevector <8 x i32> %v1, <8 x i32> undef,
- <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; yields <4 x i32>
-<result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
- <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > ; yields <8 x i32>
-extractvalue’ Instruction¶The ‘extractvalue’ instruction extracts the value of a member field
-from an aggregate value.
The first operand of an ‘extractvalue’ instruction is a value of
-struct or array type. The other operands are
-constant indices to specify which value to extract in a similar manner
-as indices in a ‘getelementptr’ instruction.
The major differences to getelementptr indexing are:
The result is the value at the position in the aggregate specified by -the index operands.
-insertvalue’ Instruction¶<result> = insertvalue <aggregate type> <val>, <ty> <elt>, <idx>{, <idx>}* ; yields <aggregate type>
-The ‘insertvalue’ instruction inserts a value into a member field in
-an aggregate value.
The first operand of an ‘insertvalue’ instruction is a value of
-struct or array type. The second operand is
-a first-class value to insert. The following operands are constant
-indices indicating the position at which to insert the value in a
-similar manner as indices in a ‘extractvalue’ instruction. The value
-to insert must have the same type as the value identified by the
-indices.
The result is an aggregate of the same type as val. Its value is
-that of val except that the value at the position specified by the
-indices is that of elt.
%agg1 = insertvalue {i32, float} undef, i32 1, 0 ; yields {i32 1, float undef}
-%agg2 = insertvalue {i32, float} %agg1, float %val, 1 ; yields {i32 1, float %val}
-%agg3 = insertvalue {i32, {float}} undef, float %val, 1, 0 ; yields {i32 undef, {float %val}}
-alloca’ Instruction¶<result> = alloca [inalloca] <type> [, <ty> <NumElements>] [, align <alignment>] [, addrspace(<num>)] ; yields type addrspace(num)*:result
-The ‘alloca’ instruction allocates memory on the stack frame of the
-currently executing function, to be automatically released when this
-function returns to its caller. If the address space is not explicitly
-specified, the object is allocated in the alloca address space from the
-datalayout string.
The ‘alloca’ instruction allocates sizeof(<type>)*NumElements
-bytes of memory on the runtime stack, returning a pointer of the
-appropriate type to the program. If “NumElements” is specified, it is
-the number of elements allocated, otherwise “NumElements” is defaulted
-to be one. If a constant alignment is specified, the value result of the
-allocation is guaranteed to be aligned to at least that boundary. The
-alignment may not be greater than 1 << 32. If not specified, or if
-zero, the target can choose to align the allocation on any convenient
-boundary compatible with the type.
‘type’ may be any sized type.
Memory is allocated; a pointer is returned. The allocated memory is
-uninitialized, and loading from uninitialized memory produces an undefined
-value. The operation itself is undefined if there is insufficient stack
-space for the allocation.’alloca’d memory is automatically released
-when the function returns. The ‘alloca’ instruction is commonly used
-to represent automatic variables that must have an address available. When
-the function returns (either with the ret or resume instructions),
-the memory is reclaimed. Allocating zero bytes is legal, but the returned
-pointer may not be unique. The order in which memory is allocated (ie.,
-which way the stack grows) is not specified.
Note that ‘alloca’ outside of the alloca address space from the
-datalayout string is meaningful only if the
-target has assigned it a semantics.
If the returned pointer is used by llvm.lifetime.start, -the returned object is initially dead. -See llvm.lifetime.start and -llvm.lifetime.end for the precise semantics of -lifetime-manipulating intrinsics.
-%ptr = alloca i32 ; yields i32*:ptr
-%ptr = alloca i32, i32 4 ; yields i32*:ptr
-%ptr = alloca i32, i32 4, align 1024 ; yields i32*:ptr
-%ptr = alloca i32, align 1024 ; yields i32*:ptr
-load’ Instruction¶<result> = load [volatile] <ty>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.load !<empty_node>][, !invariant.group !<empty_node>][, !nonnull !<empty_node>][, !dereferenceable !<deref_bytes_node>][, !dereferenceable_or_null !<deref_bytes_node>][, !align !<align_node>][, !noundef !<empty_node>]
-<result> = load atomic [volatile] <ty>, <ty>* <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>]
-!<nontemp_node> = !{ i32 1 }
-!<empty_node> = !{}
-!<deref_bytes_node> = !{ i64 <dereferenceable_bytes> }
-!<align_node> = !{ i64 <value_alignment> }
-The argument to the load instruction specifies the memory address from which
-to load. The type specified must be a first class type of
-known size (i.e. not containing an opaque structural type). If
-the load is marked as volatile, then the optimizer is not allowed to
-modify the number or order of execution of this load with other
-volatile operations.
If the load is marked as atomic, it takes an extra ordering and optional syncscope("<target-scope>") argument. The
-release and acq_rel orderings are not valid on load instructions.
-Atomic loads produce defined results when they may see
-multiple atomic stores. The type of the pointee must be an integer, pointer, or
-floating-point type whose bit width is a power of two greater than or equal to
-eight and less than or equal to a target-specific size limit. align must be
-explicitly specified on atomic loads, and the load has undefined behavior if the
-alignment is not set to a value which is at least the size in bytes of the
-pointee. !nontemporal does not have any defined semantics for atomic loads.
The optional constant align argument specifies the alignment of the
-operation (that is, the alignment of the memory address). A value of 0
-or an omitted align argument means that the operation has the ABI
-alignment for the target. It is the responsibility of the code emitter
-to ensure that the alignment information is correct. Overestimating the
-alignment results in undefined behavior. Underestimating the alignment
-may produce less efficient code. An alignment of 1 is always safe. The
-maximum possible alignment is 1 << 32. An alignment value higher
-than the size of the loaded type implies memory up to the alignment
-value bytes can be safely loaded without trapping in the default
-address space. Access of the high bytes can interfere with debugging
-tools, so should not be accessed if the function has the
-sanitize_thread or sanitize_address attributes.
The optional !nontemporal metadata must reference a single
-metadata name <nontemp_node> corresponding to a metadata node with one
-i32 entry of value 1. The existence of the !nontemporal
-metadata on the instruction tells the optimizer and code generator
-that this load is not expected to be reused in the cache. The code
-generator may select special instructions to save cache bandwidth, such
-as the MOVNT instruction on x86.
The optional !invariant.load metadata must reference a single
-metadata name <empty_node> corresponding to a metadata node with no
-entries. If a load instruction tagged with the !invariant.load
-metadata is executed, the memory location referenced by the load has
-to contain the same value at all points in the program where the
-memory location is dereferenceable; otherwise, the behavior is
-undefined.
!invariant.group metadata must reference a single metadata name<empty_node> corresponding to a metadata node with no entries.
-See invariant.group metadata invariant.group.The optional !nonnull metadata must reference a single
-metadata name <empty_node> corresponding to a metadata node with no
-entries. The existence of the !nonnull metadata on the
-instruction tells the optimizer that the value loaded is known to
-never be null. If the value is null at runtime, the behavior is undefined.
-This is analogous to the nonnull attribute on parameters and return
-values. This metadata can only be applied to loads of a pointer type.
The optional !dereferenceable metadata must reference a single metadata
-name <deref_bytes_node> corresponding to a metadata node with one i64
-entry.
-See dereferenceable metadata dereferenceable.
The optional !dereferenceable_or_null metadata must reference a single
-metadata name <deref_bytes_node> corresponding to a metadata node with one
-i64 entry.
-See dereferenceable_or_null metadata dereferenceable_or_null.
The optional !align metadata must reference a single metadata name
-<align_node> corresponding to a metadata node with one i64 entry.
-The existence of the !align metadata on the instruction tells the
-optimizer that the value loaded is known to be aligned to a boundary specified
-by the integer value in the metadata node. The alignment must be a power of 2.
-This is analogous to the ‘’align’’ attribute on parameters and return values.
-This metadata can only be applied to loads of a pointer type. If the returned
-value is not appropriately aligned at runtime, the behavior is undefined.
The optional !noundef metadata must reference a single metadata name
-<empty_node> corresponding to a node with no entries. The existence of
-!noundef metadata on the instruction tells the optimizer that the value
-loaded is known to be well defined.
-If the value isn’t well defined, the behavior is undefined.
The location of memory pointed to is loaded. If the value being loaded
-is of scalar type then the number of bytes read does not exceed the
-minimum number of bytes needed to hold all bits of the type. For
-example, loading an i24 reads at most three bytes. When loading a
-value of a type like i20 with a size that is not an integral number
-of bytes, the result is undefined if the value was not originally
-written using a store of the same type.
-If the value being loaded is of aggregate type, the bytes that correspond to
-padding may be accessed but are ignored, because it is impossible to observe
-padding from the loaded aggregate value.
-If <pointer> is not a well-defined value, the behavior is undefined.
%ptr = alloca i32 ; yields i32*:ptr
-store i32 3, i32* %ptr ; yields void
-%val = load i32, i32* %ptr ; yields i32:val = i32 3
-store’ Instruction¶store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.group !<empty_node>] ; yields void
-store atomic [volatile] <ty> <value>, <ty>* <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>] ; yields void
-!<nontemp_node> = !{ i32 1 }
-!<empty_node> = !{}
-There are two arguments to the store instruction: a value to store and an
-address at which to store it. The type of the <pointer> operand must be a
-pointer to the first class type of the <value>
-operand. If the store is marked as volatile, then the optimizer is not
-allowed to modify the number or order of execution of this store with other
-volatile operations. Only values of first class types of known size (i.e. not containing an opaque
-structural type) can be stored.
If the store is marked as atomic, it takes an extra ordering and optional syncscope("<target-scope>") argument. The
-acquire and acq_rel orderings aren’t valid on store instructions.
-Atomic loads produce defined results when they may see
-multiple atomic stores. The type of the pointee must be an integer, pointer, or
-floating-point type whose bit width is a power of two greater than or equal to
-eight and less than or equal to a target-specific size limit. align must be
-explicitly specified on atomic stores, and the store has undefined behavior if
-the alignment is not set to a value which is at least the size in bytes of the
-pointee. !nontemporal does not have any defined semantics for atomic stores.
The optional constant align argument specifies the alignment of the
-operation (that is, the alignment of the memory address). A value of 0
-or an omitted align argument means that the operation has the ABI
-alignment for the target. It is the responsibility of the code emitter
-to ensure that the alignment information is correct. Overestimating the
-alignment results in undefined behavior. Underestimating the
-alignment may produce less efficient code. An alignment of 1 is always
-safe. The maximum possible alignment is 1 << 32. An alignment
-value higher than the size of the stored type implies memory up to the
-alignment value bytes can be stored to without trapping in the default
-address space. Storing to the higher bytes however may result in data
-races if another thread can access the same address. Introducing a
-data race is not allowed. Storing to the extra bytes is not allowed
-even in situations where a data race is known to not exist if the
-function has the sanitize_address attribute.
The optional !nontemporal metadata must reference a single metadata
-name <nontemp_node> corresponding to a metadata node with one i32 entry
-of value 1. The existence of the !nontemporal metadata on the instruction
-tells the optimizer and code generator that this load is not expected to
-be reused in the cache. The code generator may select special
-instructions to save cache bandwidth, such as the MOVNT instruction on
-x86.
The optional !invariant.group metadata must reference a
-single metadata name <empty_node>. See invariant.group metadata.
The contents of memory are updated to contain <value> at the
-location specified by the <pointer> operand. If <value> is
-of scalar type then the number of bytes written does not exceed the
-minimum number of bytes needed to hold all bits of the type. For
-example, storing an i24 writes at most three bytes. When writing a
-value of a type like i20 with a size that is not an integral number
-of bytes, it is unspecified what happens to the extra bits that do not
-belong to the type, but they will typically be overwritten.
-If <value> is of aggregate type, padding is filled with
-undef.
-If <pointer> is not a well-defined value, the behavior is undefined.
%ptr = alloca i32 ; yields i32*:ptr
-store i32 3, i32* %ptr ; yields void
-%val = load i32, i32* %ptr ; yields i32:val = i32 3
-fence’ Instruction¶The ‘fence’ instruction is used to introduce happens-before edges
-between operations.
‘fence’ instructions take an ordering argument which
-defines what synchronizes-with edges they add. They can only be given
-acquire, release, acq_rel, and seq_cst orderings.
A fence A which has (at least) release ordering semantics
-synchronizes with a fence B with (at least) acquire ordering
-semantics if and only if there exist atomic operations X and Y, both
-operating on some atomic object M, such that A is sequenced before X, X
-modifies M (either directly or through some side effect of a sequence
-headed by X), Y is sequenced before B, and Y observes M. This provides a
-happens-before dependency between A and B. Rather than an explicit
-fence, one (but not both) of the atomic operations X or Y might
-provide a release or acquire (resp.) ordering constraint and
-still synchronize-with the explicit fence and establish the
-happens-before edge.
A fence which has seq_cst ordering, in addition to having both
-acquire and release semantics specified above, participates in
-the global program order of other seq_cst operations and/or fences.
A fence instruction can also take an optional
-“syncscope” argument.
fence acquire ; yields void
-fence syncscope("singlethread") seq_cst ; yields void
-fence syncscope("agent") seq_cst ; yields void
-cmpxchg’ Instruction¶cmpxchg [weak] [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [syncscope("<target-scope>")] <success ordering> <failure ordering>[, align <alignment>] ; yields { ty, i1 }
-The ‘cmpxchg’ instruction is used to atomically modify memory. It
-loads a value in memory and compares it to a given value. If they are
-equal, it tries to store a new value into the memory.
There are three arguments to the ‘cmpxchg’ instruction: an address
-to operate on, a value to compare to the value currently be at that
-address, and a new value to place at that address if the compared values
-are equal. The type of ‘<cmp>’ must be an integer or pointer type whose
-bit width is a power of two greater than or equal to eight and less
-than or equal to a target-specific size limit. ‘<cmp>’ and ‘<new>’ must
-have the same type, and the type of ‘<pointer>’ must be a pointer to
-that type. If the cmpxchg is marked as volatile, then the
-optimizer is not allowed to modify the number or order of execution of
-this cmpxchg with other volatile operations.
The success and failure ordering arguments specify how this
-cmpxchg synchronizes with other atomic operations. Both ordering parameters
-must be at least monotonic, the failure ordering cannot be either
-release or acq_rel.
A cmpxchg instruction can also take an optional
-“syncscope” argument.
The instruction can take an optional align attribute.
-The alignment must be a power of two greater or equal to the size of the
-<value> type. If unspecified, the alignment is assumed to be equal to the
-size of the ‘<value>’ type. Note that this default alignment assumption is
-different from the alignment used for the load/store instructions when align
-isn’t specified.
The pointer passed into cmpxchg must have alignment greater than or -equal to the size in memory of the operand.
-The contents of memory at the location specified by the ‘<pointer>’ operand
-is read and compared to ‘<cmp>’; if the values are equal, ‘<new>’ is
-written to the location. The original value at the location is returned,
-together with a flag indicating success (true) or failure (false).
If the cmpxchg operation is marked as weak then a spurious failure is
-permitted: the operation may not write <new> even if the comparison
-matched.
If the cmpxchg operation is strong (the default), the i1 value is 1 if and only
-if the value loaded equals cmp.
A successful cmpxchg is a read-modify-write instruction for the purpose of
-identifying release sequences. A failed cmpxchg is equivalent to an atomic
-load with an ordering parameter determined the second ordering parameter.
entry:
- %orig = load atomic i32, i32* %ptr unordered, align 4 ; yields i32
- br label %loop
-
-loop:
- %cmp = phi i32 [ %orig, %entry ], [%value_loaded, %loop]
- %squared = mul i32 %cmp, %cmp
- %val_success = cmpxchg i32* %ptr, i32 %cmp, i32 %squared acq_rel monotonic ; yields { i32, i1 }
- %value_loaded = extractvalue { i32, i1 } %val_success, 0
- %success = extractvalue { i32, i1 } %val_success, 1
- br i1 %success, label %done, label %loop
-
-done:
- ...
-atomicrmw’ Instruction¶atomicrmw [volatile] <operation> <ty>* <pointer>, <ty> <value> [syncscope("<target-scope>")] <ordering>[, align <alignment>] ; yields ty
-There are three arguments to the ‘atomicrmw’ instruction: an
-operation to apply, an address whose value to modify, an argument to the
-operation. The operation must be one of the following keywords:
For most of these operations, the type of ‘<value>’ must be an integer
-type whose bit width is a power of two greater than or equal to eight
-and less than or equal to a target-specific size limit. For xchg, this
-may also be a floating point or a pointer type with the same size constraints
-as integers. For fadd/fsub, this must be a floating point type. The
-type of the ‘<pointer>’ operand must be a pointer to that type. If
-the atomicrmw is marked as volatile, then the optimizer is not
-allowed to modify the number or order of execution of this
-atomicrmw with other volatile operations.
The instruction can take an optional align attribute.
-The alignment must be a power of two greater or equal to the size of the
-<value> type. If unspecified, the alignment is assumed to be equal to the
-size of the ‘<value>’ type. Note that this default alignment assumption is
-different from the alignment used for the load/store instructions when align
-isn’t specified.
A atomicrmw instruction can also take an optional
-“syncscope” argument.
The contents of memory at the location specified by the ‘<pointer>’
-operand are atomically read, modified, and written back. The original
-value at the location is returned. The modification is specified by the
-operation argument:
*ptr = val*ptr = *ptr + val*ptr = *ptr - val*ptr = *ptr & val*ptr = ~(*ptr & val)*ptr = *ptr | val*ptr = *ptr ^ val*ptr = *ptr > val ? *ptr : val (using a signed comparison)*ptr = *ptr < val ? *ptr : val (using a signed comparison)*ptr = *ptr > val ? *ptr : val (using an unsigned comparison)*ptr = *ptr < val ? *ptr : val (using an unsigned comparison)*ptr = *ptr + val (using floating point arithmetic)*ptr = *ptr - val (using floating point arithmetic)getelementptr’ Instruction¶<result> = getelementptr <ty>, <ty>* <ptrval>{, [inrange] <ty> <idx>}*
-<result> = getelementptr inbounds <ty>, <ty>* <ptrval>{, [inrange] <ty> <idx>}*
-<result> = getelementptr <ty>, <ptr vector> <ptrval>, [inrange] <vector index type> <idx>
-The ‘getelementptr’ instruction is used to get the address of a
-subelement of an aggregate data structure. It performs
-address calculation only and does not access memory. The instruction can also
-be used to calculate a vector of such addresses.
The first argument is always a type used as the basis for the calculations. -The second argument is always a pointer or a vector of pointers, and is the -base address to start from. The remaining arguments are indices -that indicate which of the elements of the aggregate object are indexed. -The interpretation of each index is dependent on the type being indexed -into. The first index always indexes the pointer value given as the -second argument, the second index indexes a value of the type pointed to -(not necessarily the value directly pointed to, since the first index -can be non-zero), etc. The first type indexed into must be a pointer -value, subsequent types can be arrays, vectors, and structs. Note that -subsequent types being indexed into can never be pointers, since that -would require loading the pointer before continuing calculation.
-The type of each index argument depends on the type it is indexing into.
-When indexing into a (optionally packed) structure, only i32 integer
-constants are allowed (when using a vector of indices they must all
-be the same i32 integer constant). When indexing into an array,
-pointer or vector, integers of any width are allowed, and they are not
-required to be constant. These integers are treated as signed values
-where relevant.
For example, let’s consider a C code fragment and how it gets compiled -to LLVM:
-struct RT {
- char A;
- int B[10][20];
- char C;
-};
-struct ST {
- int X;
- double Y;
- struct RT Z;
-};
-
-int *foo(struct ST *s) {
- return &s[1].Z.B[5][13];
-}
-The LLVM code generated by Clang is:
-%struct.RT = type { i8, [10 x [20 x i32]], i8 }
-%struct.ST = type { i32, double, %struct.RT }
-
-define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
-entry:
- %arrayidx = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
- ret i32* %arrayidx
-}
-In the example above, the first index is indexing into the
-‘%struct.ST*’ type, which is a pointer, yielding a ‘%struct.ST’
-= ‘{ i32, double, %struct.RT }’ type, a structure. The second index
-indexes into the third element of the structure, yielding a
-‘%struct.RT’ = ‘{ i8 , [10 x [20 x i32]], i8 }’ type, another
-structure. The third index indexes into the second element of the
-structure, yielding a ‘[10 x [20 x i32]]’ type, an array. The two
-dimensions of the array are subscripted into, yielding an ‘i32’
-type. The ‘getelementptr’ instruction returns a pointer to this
-element, thus computing a value of ‘i32*’ type.
Note that it is perfectly legal to index partially through a structure, -returning a pointer to an inner element. Because of this, the LLVM code -for the given testcase is equivalent to:
-define i32* @foo(%struct.ST* %s) {
- %t1 = getelementptr %struct.ST, %struct.ST* %s, i32 1 ; yields %struct.ST*:%t1
- %t2 = getelementptr %struct.ST, %struct.ST* %t1, i32 0, i32 2 ; yields %struct.RT*:%t2
- %t3 = getelementptr %struct.RT, %struct.RT* %t2, i32 0, i32 1 ; yields [10 x [20 x i32]]*:%t3
- %t4 = getelementptr [10 x [20 x i32]], [10 x [20 x i32]]* %t3, i32 0, i32 5 ; yields [20 x i32]*:%t4
- %t5 = getelementptr [20 x i32], [20 x i32]* %t4, i32 0, i32 13 ; yields i32*:%t5
- ret i32* %t5
-}
-If the inbounds keyword is present, the result value of the
-getelementptr is a poison value if one of the
-following rules is violated:
nsw).nsw).nuw).inbounds keyword
-applies to each of the computations element-wise.These rules are based on the assumption that no allocated object may cross -the unsigned address space boundary, and no allocated object may be larger -than half the pointer index type space.
-If the inbounds keyword is not present, the offsets are added to the
-base address with silently-wrapping two’s complement arithmetic. If the
-offsets have a different width from the pointer, they are sign-extended
-or truncated to the width of the pointer. The result value of the
-getelementptr may be outside the object pointed to by the base
-pointer. The result value may not necessarily be used to access memory
-though, even if it happens to point into allocated storage. See the
-Pointer Aliasing Rules section for more
-information.
If the inrange keyword is present before any index, loading from or
-storing to any pointer derived from the getelementptr has undefined
-behavior if the load or store would access memory outside of the bounds of
-the element selected by the index marked as inrange. The result of a
-pointer comparison or ptrtoint (including ptrtoint-like operations
-involving memory) involving a pointer derived from a getelementptr with
-the inrange keyword is undefined, with the exception of comparisons
-in the case where both operands are in the range of the element selected
-by the inrange keyword, inclusive of the address one past the end of
-that element. Note that the inrange keyword is currently only allowed
-in constant getelementptr expressions.
The getelementptr instruction is often confusing. For some more insight -into how it works, see the getelementptr FAQ.
-; yields [12 x i8]*:aptr
-%aptr = getelementptr {i32, [12 x i8]}, {i32, [12 x i8]}* %saptr, i64 0, i32 1
-; yields i8*:vptr
-%vptr = getelementptr {i32, <2 x i8>}, {i32, <2 x i8>}* %svptr, i64 0, i32 1, i32 1
-; yields i8*:eptr
-%eptr = getelementptr [12 x i8], [12 x i8]* %aptr, i64 0, i32 1
-; yields i32*:iptr
-%iptr = getelementptr [10 x i32], [10 x i32]* @arr, i16 0, i16 0
-The getelementptr returns a vector of pointers, instead of a single address,
-when one or more of its arguments is a vector. In such cases, all vector
-arguments should have the same number of elements, and every scalar argument
-will be effectively broadcast into a vector during address calculation.
; All arguments are vectors:
-; A[i] = ptrs[i] + offsets[i]*sizeof(i8)
-%A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets
-
-; Add the same scalar offset to each pointer of a vector:
-; A[i] = ptrs[i] + offset*sizeof(i8)
-%A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset
-
-; Add distinct offsets to the same pointer:
-; A[i] = ptr + offsets[i]*sizeof(i8)
-%A = getelementptr i8, i8* %ptr, <4 x i64> %offsets
-
-; In all cases described above the type of the result is <4 x i8*>
-The two following instructions are equivalent:
-getelementptr %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
- <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
- <4 x i32> <i32 1, i32 1, i32 1, i32 1>,
- <4 x i32> %ind4,
- <4 x i64> <i64 13, i64 13, i64 13, i64 13>
-
-getelementptr %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
- i32 2, i32 1, <4 x i32> %ind4, i64 13
-Let’s look at the C code, where the vector version of getelementptr
-makes sense:
// Let's assume that we vectorize the following loop:
-double *A, *B; int *C;
-for (int i = 0; i < size; ++i) {
- A[i] = B[C[i]];
-}
-; get pointers for 8 elements from array B
-%ptrs = getelementptr double, double* %B, <8 x i32> %C
-; load 8 elements from array B into A
-%A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> %ptrs,
- i32 8, <8 x i1> %mask, <8 x double> %passthru)
-trunc .. to’ Instruction¶The ‘trunc’ instruction takes a value to trunc, and a type to trunc
-it to. Both types must be of integer types, or vectors
-of the same number of integers. The bit size of the value must be
-larger than the bit size of the destination type, ty2. Equal sized
-types are not allowed.
The ‘trunc’ instruction truncates the high order bits in value
-and converts the remaining bits to ty2. Since the source size must
-be larger than the destination size, trunc cannot be a no-op cast.
-It will always truncate bits.
%X = trunc i32 257 to i8 ; yields i8:1
-%Y = trunc i32 123 to i1 ; yields i1:true
-%Z = trunc i32 122 to i1 ; yields i1:false
-%W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>
-zext .. to’ Instruction¶The ‘zext’ instruction takes a value to cast, and a type to cast it
-to. Both types must be of integer types, or vectors of
-the same number of integers. The bit size of the value must be
-smaller than the bit size of the destination type, ty2.
The zext fills the high order bits of the value with zero bits
-until it reaches the size of the destination type, ty2.
When zero extending from i1, the result will always be either 0 or 1.
-%X = zext i32 257 to i64 ; yields i64:257
-%Y = zext i1 true to i32 ; yields i32:1
-%Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
-sext .. to’ Instruction¶The ‘sext’ instruction takes a value to cast, and a type to cast it
-to. Both types must be of integer types, or vectors of
-the same number of integers. The bit size of the value must be
-smaller than the bit size of the destination type, ty2.
The ‘sext’ instruction performs a sign extension by copying the sign
-bit (highest order bit) of the value until it reaches the bit size
-of the type ty2.
When sign extending from i1, the extension always results in -1 or 0.
-%X = sext i8 -1 to i16 ; yields i16 :65535
-%Y = sext i1 true to i32 ; yields i32:-1
-%Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
-fptrunc .. to’ Instruction¶The ‘fptrunc’ instruction takes a floating-point
-value to cast and a floating-point type to cast it to.
-The size of value must be larger than the size of ty2. This
-implies that fptrunc cannot be used to make a no-op cast.
The ‘fptrunc’ instruction casts a value from a larger
-floating-point type to a smaller floating-point type.
-This instruction is assumed to execute in the default floating-point
-environment.
%X = fptrunc double 16777217.0 to float ; yields float:16777216.0
-%Y = fptrunc double 1.0E+300 to half ; yields half:+infinity
-fpext .. to’ Instruction¶The ‘fpext’ instruction takes a floating-point
-value to cast, and a floating-point type to cast it
-to. The source type must be smaller than the destination type.
The ‘fpext’ instruction extends the value from a smaller
-floating-point type to a larger floating-point type. The fpext cannot be used to make a
-no-op cast because it always changes bits. Use bitcast to make a
-no-op cast for a floating-point cast.
%X = fpext float 3.125 to double ; yields double:3.125000e+00
-%Y = fpext double %X to fp128 ; yields fp128:0xL00000000000000004000900000000000
-fptoui .. to’ Instruction¶The ‘fptoui’ converts a floating-point value to its unsigned
-integer equivalent of type ty2.
The ‘fptoui’ instruction takes a value to cast, which must be a
-scalar or vector floating-point value, and a type to
-cast it to ty2, which must be an integer type. If
-ty is a vector floating-point type, ty2 must be a vector integer
-type with the same number of elements as ty
The ‘fptoui’ instruction converts its floating-point operand into the nearest (rounding towards zero)
-unsigned integer value. If the value cannot fit in ty2, the result
-is a poison value.
%X = fptoui double 123.0 to i32 ; yields i32:123
-%Y = fptoui float 1.0E+300 to i1 ; yields undefined:1
-%Z = fptoui float 1.04E+17 to i8 ; yields undefined:1
-fptosi .. to’ Instruction¶The ‘fptosi’ instruction converts floating-point
-value to type ty2.
The ‘fptosi’ instruction takes a value to cast, which must be a
-scalar or vector floating-point value, and a type to
-cast it to ty2, which must be an integer type. If
-ty is a vector floating-point type, ty2 must be a vector integer
-type with the same number of elements as ty
The ‘fptosi’ instruction converts its floating-point operand into the nearest (rounding towards zero)
-signed integer value. If the value cannot fit in ty2, the result
-is a poison value.
%X = fptosi double -123.0 to i32 ; yields i32:-123
-%Y = fptosi float 1.0E-247 to i1 ; yields undefined:1
-%Z = fptosi float 1.04E+17 to i8 ; yields undefined:1
-uitofp .. to’ Instruction¶The ‘uitofp’ instruction regards value as an unsigned integer
-and converts that value to the ty2 type.
The ‘uitofp’ instruction takes a value to cast, which must be a
-scalar or vector integer value, and a type to cast it to
-ty2, which must be an floating-point type. If
-ty is a vector integer type, ty2 must be a vector floating-point
-type with the same number of elements as ty
The ‘uitofp’ instruction interprets its operand as an unsigned
-integer quantity and converts it to the corresponding floating-point
-value. If the value cannot be exactly represented, it is rounded using
-the default rounding mode.
%X = uitofp i32 257 to float ; yields float:257.0
-%Y = uitofp i8 -1 to double ; yields double:255.0
-sitofp .. to’ Instruction¶The ‘sitofp’ instruction regards value as a signed integer and
-converts that value to the ty2 type.
The ‘sitofp’ instruction takes a value to cast, which must be a
-scalar or vector integer value, and a type to cast it to
-ty2, which must be an floating-point type. If
-ty is a vector integer type, ty2 must be a vector floating-point
-type with the same number of elements as ty
The ‘sitofp’ instruction interprets its operand as a signed integer
-quantity and converts it to the corresponding floating-point value. If the
-value cannot be exactly represented, it is rounded using the default rounding
-mode.
%X = sitofp i32 257 to float ; yields float:257.0
-%Y = sitofp i8 -1 to double ; yields double:-1.0
-ptrtoint .. to’ Instruction¶The ‘ptrtoint’ instruction converts the pointer or a vector of
-pointers value to the integer (or vector of integers) type ty2.
The ‘ptrtoint’ instruction takes a value to cast, which must be
-a value of type pointer or a vector of pointers, and a
-type to cast it to ty2, which must be an integer or
-a vector of integers type.
The ‘ptrtoint’ instruction converts value to integer type
-ty2 by interpreting the pointer value as an integer and either
-truncating or zero extending that value to the size of the integer type.
-If value is smaller than ty2 then a zero extension is done. If
-value is larger than ty2 then a truncation is done. If they are
-the same size, then nothing is done (no-op cast) other than a type
-change.
%X = ptrtoint i32* %P to i8 ; yields truncation on 32-bit architecture
-%Y = ptrtoint i32* %P to i64 ; yields zero extension on 32-bit architecture
-%Z = ptrtoint <4 x i32*> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture
-inttoptr .. to’ Instruction¶<result> = inttoptr <ty> <value> to <ty2>[, !dereferenceable !<deref_bytes_node>][, !dereferenceable_or_null !<deref_bytes_node>] ; yields ty2
-The ‘inttoptr’ instruction takes an integer value to
-cast, and a type to cast it to, which must be a pointer
-type.
The optional !dereferenceable metadata must reference a single metadata
-name <deref_bytes_node> corresponding to a metadata node with one i64
-entry.
-See dereferenceable metadata.
The optional !dereferenceable_or_null metadata must reference a single
-metadata name <deref_bytes_node> corresponding to a metadata node with one
-i64 entry.
-See dereferenceable_or_null metadata.
The ‘inttoptr’ instruction converts value to type ty2 by
-applying either a zero extension or a truncation depending on the size
-of the integer value. If value is larger than the size of a
-pointer then a truncation is done. If value is smaller than the size
-of a pointer then a zero extension is done. If they are the same size,
-nothing is done (no-op cast).
%X = inttoptr i32 255 to i32* ; yields zero extension on 64-bit architecture
-%Y = inttoptr i32 255 to i32* ; yields no-op on 32-bit architecture
-%Z = inttoptr i64 0 to i32* ; yields truncation on 32-bit architecture
-%Z = inttoptr <4 x i32> %G to <4 x i8*>; yields truncation of vector G to four pointers
-bitcast .. to’ Instruction¶The ‘bitcast’ instruction takes a value to cast, which must be a
-non-aggregate first class value, and a type to cast it to, which must
-also be a non-aggregate first class type. The
-bit sizes of value and the destination type, ty2, must be
-identical. If the source type is a pointer, the destination type must
-also be a pointer of the same size. This instruction supports bitwise
-conversion of vectors to integers and to vectors of other types (as
-long as they have the same size).
The ‘bitcast’ instruction converts value to type ty2. It
-is always a no-op cast because no bits change with this
-conversion. The conversion is done as if the value had been stored
-to memory and read back as type ty2. Pointer (or vector of
-pointers) types may only be converted to other pointer (or vector of
-pointers) types with the same address space through this instruction.
-To convert pointers to other types, use the inttoptr
-or ptrtoint instructions first.
There is a caveat for bitcasts involving vector types in relation to
-endianess. For example bitcast <2 x i8> <value> to i16 puts element zero
-of the vector in the least significant bits of the i16 for little-endian while
-element zero ends up in the most significant bits for big-endian.
%X = bitcast i8 255 to i8 ; yields i8 :-1
-%Y = bitcast i32* %x to i16* ; yields i16*:%x
-%Z = bitcast <2 x i32> %V to i64; ; yields i64: %V (depends on endianess)
-%Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*>
-addrspacecast .. to’ Instruction¶The ‘addrspacecast’ instruction converts ptrval from pty in
-address space n to type pty2 in address space m.
The ‘addrspacecast’ instruction takes a pointer or vector of pointer value
-to cast and a pointer type to cast it to, which must have a different
-address space.
The ‘addrspacecast’ instruction converts the pointer value
-ptrval to type pty2. It can be a no-op cast or a complex
-value modification, depending on the target and the address space
-pair. Pointer conversions within the same address space must be
-performed with the bitcast instruction. Note that if the address space
-conversion is legal then both result and operand refer to the same memory
-location.
%X = addrspacecast i32* %x to i32 addrspace(1)* ; yields i32 addrspace(1)*:%x
-%Y = addrspacecast i32 addrspace(1)* %y to i64 addrspace(2)* ; yields i64 addrspace(2)*:%y
-%Z = addrspacecast <4 x i32*> %z to <4 x float addrspace(3)*> ; yields <4 x float addrspace(3)*>:%z
-icmp’ Instruction¶The ‘icmp’ instruction returns a boolean value or a vector of
-boolean values based on comparison of its two integer, integer vector,
-pointer, or pointer vector operands.
The ‘icmp’ instruction takes three operands. The first operand is
-the condition code indicating the kind of comparison to perform. It is
-not a value, just a keyword. The possible condition codes are:
eq: equalne: not equalugt: unsigned greater thanuge: unsigned greater or equalult: unsigned less thanule: unsigned less or equalsgt: signed greater thansge: signed greater or equalslt: signed less thansle: signed less or equalThe remaining two arguments must be integer or -pointer or integer vector typed. They -must also be identical types.
-The ‘icmp’ compares op1 and op2 according to the condition
-code given as cond. The comparison performed always yields either an
-i1 or vector of i1 result, as follows:
eq: yields true if the operands are equal, false
-otherwise. No sign interpretation is necessary or performed.ne: yields true if the operands are unequal, false
-otherwise. No sign interpretation is necessary or performed.ugt: interprets the operands as unsigned values and yields
-true if op1 is greater than op2.uge: interprets the operands as unsigned values and yields
-true if op1 is greater than or equal to op2.ult: interprets the operands as unsigned values and yields
-true if op1 is less than op2.ule: interprets the operands as unsigned values and yields
-true if op1 is less than or equal to op2.sgt: interprets the operands as signed values and yields true
-if op1 is greater than op2.sge: interprets the operands as signed values and yields true
-if op1 is greater than or equal to op2.slt: interprets the operands as signed values and yields true
-if op1 is less than op2.sle: interprets the operands as signed values and yields true
-if op1 is less than or equal to op2.If the operands are pointer typed, the pointer values -are compared as if they were integers.
-If the operands are integer vectors, then they are compared element by
-element. The result is an i1 vector with the same number of elements
-as the values being compared. Otherwise, the result is an i1.
<result> = icmp eq i32 4, 5 ; yields: result=false
-<result> = icmp ne float* %X, %X ; yields: result=false
-<result> = icmp ult i16 4, 5 ; yields: result=true
-<result> = icmp sgt i16 4, 5 ; yields: result=false
-<result> = icmp ule i16 -4, 5 ; yields: result=false
-<result> = icmp sge i16 4, 5 ; yields: result=false
-fcmp’ Instruction¶<result> = fcmp [fast-math flags]* <cond> <ty> <op1>, <op2> ; yields i1 or <N x i1>:result
-The ‘fcmp’ instruction returns a boolean value or vector of boolean
-values based on comparison of its operands.
If the operands are floating-point scalars, then the result type is a -boolean (i1).
-If the operands are floating-point vectors, then the result type is a -vector of boolean with the same number of elements as the operands being -compared.
-The ‘fcmp’ instruction takes three operands. The first operand is
-the condition code indicating the kind of comparison to perform. It is
-not a value, just a keyword. The possible condition codes are:
false: no comparison, always returns falseoeq: ordered and equalogt: ordered and greater thanoge: ordered and greater than or equalolt: ordered and less thanole: ordered and less than or equalone: ordered and not equalord: ordered (no nans)ueq: unordered or equalugt: unordered or greater thanuge: unordered or greater than or equalult: unordered or less thanule: unordered or less than or equalune: unordered or not equaluno: unordered (either nans)true: no comparison, always returns trueOrdered means that neither operand is a QNAN while unordered means -that either operand may be a QNAN.
-Each of val1 and val2 arguments must be either a floating-point type or a vector of floating-point type.
-They must have identical types.
The ‘fcmp’ instruction compares op1 and op2 according to the
-condition code given as cond. If the operands are vectors, then the
-vectors are compared element by element. Each comparison performed
-always yields an i1 result, as follows:
false: always yields false, regardless of operands.oeq: yields true if both operands are not a QNAN and op1
-is equal to op2.ogt: yields true if both operands are not a QNAN and op1
-is greater than op2.oge: yields true if both operands are not a QNAN and op1
-is greater than or equal to op2.olt: yields true if both operands are not a QNAN and op1
-is less than op2.ole: yields true if both operands are not a QNAN and op1
-is less than or equal to op2.one: yields true if both operands are not a QNAN and op1
-is not equal to op2.ord: yields true if both operands are not a QNAN.ueq: yields true if either operand is a QNAN or op1 is
-equal to op2.ugt: yields true if either operand is a QNAN or op1 is
-greater than op2.uge: yields true if either operand is a QNAN or op1 is
-greater than or equal to op2.ult: yields true if either operand is a QNAN or op1 is
-less than op2.ule: yields true if either operand is a QNAN or op1 is
-less than or equal to op2.une: yields true if either operand is a QNAN or op1 is
-not equal to op2.uno: yields true if either operand is a QNAN.true: always yields true, regardless of operands.The fcmp instruction can also optionally take any number of
-fast-math flags, which are optimization hints to enable
-otherwise unsafe floating-point optimizations.
Any set of fast-math flags are legal on an fcmp instruction, but the
-only flags that have any effect on its semantics are those that allow
-assumptions to be made about the values of input arguments; namely
-nnan, ninf, and reassoc. See Fast-Math Flags for more information.
<result> = fcmp oeq float 4.0, 5.0 ; yields: result=false
-<result> = fcmp one float 4.0, 5.0 ; yields: result=true
-<result> = fcmp olt float 4.0, 5.0 ; yields: result=true
-<result> = fcmp ueq double 1.0, 2.0 ; yields: result=false
-phi’ Instruction¶The ‘phi’ instruction is used to implement the φ node in the SSA
-graph representing the function.
The type of the incoming values is specified with the first type field.
-After this, the ‘phi’ instruction takes a list of pairs as
-arguments, with one pair for each predecessor basic block of the current
-block. Only values of first class type may be used as
-the value arguments to the PHI node. Only labels may be used as the
-label arguments.
There must be no non-phi instructions between the start of a basic block -and the PHI instructions: i.e. PHI instructions must be first in a basic -block.
-For the purposes of the SSA form, the use of each incoming value is
-deemed to occur on the edge from the corresponding predecessor block to
-the current block (but after any definition of an ‘invoke’
-instruction’s return value on the same edge).
The optional fast-math-flags marker indicates that the phi has one
-or more fast-math-flags. These are optimization hints
-to enable otherwise unsafe floating-point optimizations. Fast-math-flags
-are only valid for phis that return a floating-point scalar or vector
-type, or an array (nested to any depth) of floating-point scalar or vector
-types.
At runtime, the ‘phi’ instruction logically takes on the value
-specified by the pair corresponding to the predecessor basic block that
-executed just prior to the current block.
Loop: ; Infinite loop that counts from 0 on up...
- %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
- %nextindvar = add i32 %indvar, 1
- br label %Loop
-select’ Instruction¶<result> = select [fast-math flags] selty <cond>, <ty> <val1>, <ty> <val2> ; yields ty
-
-selty is either i1 or {<N x i1>}
-The ‘select’ instruction is used to choose one value based on a
-condition, without IR-level branching.
The ‘select’ instruction requires an ‘i1’ value or a vector of ‘i1’
-values indicating the condition, and two values of the same first
-class type.
fast-math flags marker indicates that the select has one or more
-fast-math flags. These are optimization hints to enable
-otherwise unsafe floating-point optimizations. Fast-math flags are only valid
-for selects that return a floating-point scalar or vector type, or an array
-(nested to any depth) of floating-point scalar or vector types.If the condition is an i1 and it evaluates to 1, the instruction returns -the first value argument; otherwise, it returns the second value -argument.
-If the condition is a vector of i1, then the value arguments must be -vectors of the same size, and the selection is done element by element.
-If the condition is an i1 and the value arguments are vectors of the -same size, then an entire vector is selected.
-freeze’ Instruction¶The ‘freeze’ instruction takes a single argument.
If the argument is undef or poison, ‘freeze’ returns an
-arbitrary, but fixed, value of type ‘ty’.
-Otherwise, this instruction is a no-op and returns the input argument.
-All uses of a value returned by the same ‘freeze’ instruction are
-guaranteed to always observe the same value, while different ‘freeze’
-instructions may yield different values.
While undef and poison pointers can be frozen, the result is a
-non-dereferenceable pointer. See the
-Pointer Aliasing Rules section for more information.
-If an aggregate value or vector is frozen, the operand is frozen element-wise.
-The padding of an aggregate isn’t considered, since it isn’t visible
-without storing it into memory and loading it with a different type.
%w = i32 undef
-%x = freeze i32 %w
-%y = add i32 %w, %w ; undef
-%z = add i32 %x, %x ; even number because all uses of %x observe
- ; the same value
-%x2 = freeze i32 %w
-%cmp = icmp eq i32 %x, %x2 ; can be true or false
-
-; example with vectors
-%v = <2 x i32> <i32 undef, i32 poison>
-%a = extractelement <2 x i32> %v, i32 0 ; undef
-%b = extractelement <2 x i32> %v, i32 1 ; poison
-%add = add i32 %a, %a ; undef
-
-%v.fr = freeze <2 x i32> %v ; element-wise freeze
-%d = extractelement <2 x i32> %v.fr, i32 0 ; not undef
-%add.f = add i32 %d, %d ; even number
-
-; branching on frozen value
-%poison = add nsw i1 %k, undef ; poison
-%c = freeze i1 %poison
-br i1 %c, label %foo, label %bar ; non-deterministic branch to %foo or %bar
-call’ Instruction¶<result> = [tail | musttail | notail ] call [fast-math flags] [cconv] [ret attrs] [addrspace(<num>)]
- <ty>|<fnty> <fnptrval>(<function args>) [fn attrs] [ operand bundles ]
-This instruction requires several arguments:
-The optional tail and musttail markers indicate that the optimizers
-should perform tail call optimization. The tail marker is a hint that
-can be ignored. The musttail marker
-means that the call must be tail call optimized in order for the program to
-be correct. The musttail marker provides these guarantees:
"thunk" attribute
-and the caller and callee both have varargs, than any unprototyped
-arguments in register or memory are forwarded to the callee. Similarly,
-the return value of the callee is returned to the caller’s caller, even
-if a void return type is in use.Both markers imply that the callee does not access allocas from the caller.
-The tail marker additionally implies that the callee does not access
-varargs from the caller. Calls marked musttail must obey the following
-additional rules:
--In addition, if the calling convention is not swifttailcc or tailcc:
----
-- All ABI-impacting function attributes, such as sret, byval, inreg, -returned, and inalloca, must match.
-- The caller and callee prototypes must match. Pointer types of parameters -or return types may differ in pointee type, but not in address space.
-On the other hand, if the calling convention is swifttailcc or swiftcc:
----
-- Only these ABI-impacting attributes attributes are allowed: sret, byval, -swiftself, and swiftasync.
-- Prototypes are not required to match.
-Tail call optimization for calls marked
-tailis guaranteed to occur if -the following conditions are met:-
-- Caller and callee both have the calling convention
-fastccortailcc.- The call is in tail position (ret immediately follows call and ret -uses value of call or is void).
-- Option
--tailcalloptis enabled, -llvm::GuaranteedTailCallOptistrue, or the calling convention -istailcc- Platform-specific constraints are -met.
-
notail marker indicates that the optimizers should not add
-tail or musttail markers to the call. It is used to prevent tail
-call optimization from being performed on the call.fast-math flags marker indicates that the call has one or more
-fast-math flags, which are optimization hints to enable
-otherwise unsafe floating-point optimizations. Fast-math flags are only valid
-for calls that return a floating-point scalar or vector type, or an array
-(nested to any depth) of floating-point scalar or vector types.zeroext’, ‘signext’, and ‘inreg’ attributes
-are valid here.ty’: the type of the call instruction itself which is also the
-type of the return value. Functions that return no value are marked
-void.fnty’: shall be the signature of the function being called. The
-argument types must match the types implied by this signature. This
-type can be omitted if the function is not varargs.fnptrval’: An LLVM value containing a pointer to a function to
-be called. In most cases, this is a direct function call, but
-indirect call’s are just as possible, calling an arbitrary pointer
-to function value.function args’: argument list whose types match the function
-signature argument types and parameter attributes. All arguments must
-be of first class type. If the function signature
-indicates the function accepts a variable number of arguments, the
-extra arguments can be specified.The ‘call’ instruction is used to cause control flow to transfer to
-a specified function, with its incoming arguments bound to the specified
-values. Upon a ‘ret’ instruction in the called function, control
-flow continues with the instruction after the function call, and the
-return value of the function is bound to the result argument.
%retval = call i32 @test(i32 %argc)
-call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42) ; yields i32
-%X = tail call i32 @foo() ; yields i32
-%Y = tail call fastcc i32 @foo() ; yields i32
-call void %foo(i8 signext 97)
-
-%struct.A = type { i32, i8 }
-%r = call %struct.A @foo() ; yields { i32, i8 }
-%gr = extractvalue %struct.A %r, 0 ; yields i32
-%gr1 = extractvalue %struct.A %r, 1 ; yields i8
-%Z = call void @foo() noreturn ; indicates that %foo never returns normally
-%ZZ = call zeroext i32 @bar() ; Return value is %zero extended
-llvm treats calls to some functions with names and arguments that match -the standard C99 library as being the C99 library functions, and may -perform optimizations or generate code for them under that assumption. -This is something we’d like to change in the future to provide better -support for freestanding environments and non-C-based languages.
-va_arg’ Instruction¶The ‘va_arg’ instruction is used to access arguments passed through
-the “variable argument” area of a function call. It is used to implement
-the va_arg macro in C.
This instruction takes a va_list* value and the type of the
-argument. It returns a value of the specified argument type and
-increments the va_list to point to the next argument. The actual
-type of va_list is target specific.
The ‘va_arg’ instruction loads an argument of the specified type
-from the specified va_list and causes the va_list to point to
-the next argument. For more information, see the variable argument
-handling Intrinsic Functions.
It is legal for this instruction to be called in a function which does
-not take a variable number of arguments, for example, the vfprintf
-function.
va_arg is an LLVM instruction instead of an intrinsic
-function because it takes a type as an argument.
See the variable argument processing section.
-Note that the code generator does not yet fully support va_arg on many -targets. Also, it does not currently support va_arg with aggregate -types on any target.
-landingpad’ Instruction¶<resultval> = landingpad <resultty> <clause>+
-<resultval> = landingpad <resultty> cleanup <clause>*
-
-<clause> := catch <type> <value>
-<clause> := filter <array constant type> <array constant>
-The ‘landingpad’ instruction is used by LLVM’s exception handling
-system to specify that a basic block
-is a landing pad — one where the exception lands, and corresponds to the
-code found in the catch portion of a try/catch sequence. It
-defines values supplied by the personality function upon
-re-entry to the function. The resultval has the type resultty.
The optional
-cleanup flag indicates that the landing pad block is a cleanup.
A clause begins with the clause type — catch or filter — and
-contains the global variable representing the “type” that may be caught
-or filtered respectively. Unlike the catch clause, the filter
-clause takes an array constant as its argument. Use
-“[0 x i8**] undef” for a filter which cannot throw. The
-‘landingpad’ instruction must contain at least one clause or
-the cleanup flag.
The ‘landingpad’ instruction defines the values which are set by the
-personality function upon re-entry to the function, and
-therefore the “result type” of the landingpad instruction. As with
-calling conventions, how the personality function results are
-represented in LLVM IR is target specific.
The clauses are applied in order from top to bottom. If two
-landingpad instructions are merged together through inlining, the
-clauses from the calling function are appended to the list of clauses.
-When the call stack is being unwound due to an exception being thrown,
-the exception is compared against each clause in turn. If it doesn’t
-match any of the clauses, and the cleanup flag is not set, then
-unwinding continues further up the call stack.
The landingpad instruction has several restrictions:
invoke’ instruction.landingpad’ instruction as its
-first non-PHI instruction.landingpad’ instruction within the landing
-pad block.landingpad’ instruction.;; A landing pad which can catch an integer.
-%res = landingpad { i8*, i32 }
- catch i8** @_ZTIi
-;; A landing pad that is a cleanup.
-%res = landingpad { i8*, i32 }
- cleanup
-;; A landing pad which can catch an integer and can only throw a double.
-%res = landingpad { i8*, i32 }
- catch i8** @_ZTIi
- filter [1 x i8**] [i8** @_ZTId]
-catchpad’ Instruction¶The ‘catchpad’ instruction is used by LLVM’s exception handling
-system to specify that a basic block
-begins a catch handler — one where a personality routine attempts to transfer
-control to catch an exception.
The catchswitch operand must always be a token produced by a
-catchswitch instruction in a predecessor block. This
-ensures that each catchpad has exactly one predecessor block, and it always
-terminates in a catchswitch.
The args correspond to whatever information the personality routine
-requires to know if this is an appropriate handler for the exception. Control
-will transfer to the catchpad if this is the first appropriate handler for
-the exception.
The resultval has the type token and is used to match the
-catchpad to corresponding catchrets and other nested EH
-pads.
When the call stack is being unwound due to an exception being thrown, the
-exception is compared against the args. If it doesn’t match, control will
-not reach the catchpad instruction. The representation of args is
-entirely target and personality function-specific.
Like the landingpad instruction, the catchpad
-instruction must be the first non-phi of its parent basic block.
The meaning of the tokens produced and consumed by catchpad and other “pad”
-instructions is described in the
-Windows exception handling documentation.
When a catchpad has been “entered” but not yet “exited” (as
-described in the EH documentation),
-it is undefined behavior to execute a call or invoke
-that does not carry an appropriate “funclet” bundle.
dispatch:
- %cs = catchswitch within none [label %handler0] unwind to caller
- ;; A catch block which can catch an integer.
-handler0:
- %tok = catchpad within %cs [i8** @_ZTIi]
-cleanuppad’ Instruction¶The ‘cleanuppad’ instruction is used by LLVM’s exception handling
-system to specify that a basic block
-is a cleanup block — one where a personality routine attempts to
-transfer control to run cleanup actions.
-The args correspond to whatever additional
-information the personality function requires to
-execute the cleanup.
-The resultval has the type token and is used to
-match the cleanuppad to corresponding cleanuprets.
-The parent argument is the token of the funclet that contains the
-cleanuppad instruction. If the cleanuppad is not inside a funclet,
-this operand may be the token none.
The instruction takes a list of arbitrary values which are interpreted -by the personality function.
-When the call stack is being unwound due to an exception being thrown,
-the personality function transfers control to the
-cleanuppad with the aid of the personality-specific arguments.
-As with calling conventions, how the personality function results are
-represented in LLVM IR is target specific.
The cleanuppad instruction has several restrictions:
cleanuppad’ instruction as its
-first non-PHI instruction.cleanuppad’ instruction within the
-cleanup block.cleanuppad’ instruction.When a cleanuppad has been “entered” but not yet “exited” (as
-described in the EH documentation),
-it is undefined behavior to execute a call or invoke
-that does not carry an appropriate “funclet” bundle.
ret’ Instruction¶ret <type> <value> ; Return a value from a non-void functionret void ; Return from void functionThe ‘ret’ instruction is used to return control flow (and optionallya value) from a function back to the caller.
There are two forms of the ‘ret’ instruction: one that returns avalue and then causes control flow, and one that just causes controlflow to occur.
The ‘ret’ instruction optionally accepts a single argument, thereturn value. The type of the return value must be a ‘firstclass’ type.
A function is not well formed if it has a non-voidreturn type and contains a ‘ret’ instruction with no return value ora return value with a type that does not match its type, or if it has avoid return type and contains a ‘ret’ instruction with a returnvalue.
When the ‘ret’ instruction is executed, control flow returns back tothe calling function’s context. If the caller is a“call” instruction, execution continues at theinstruction after the call. If the caller was an“invoke” instruction, execution continues at thebeginning of the “normal” destination block. If the instruction returnsa value, that value shall set the call or invoke instruction’s returnvalue.
ret i32 5 ; Return an integer value of 5ret void ; Return from a void functionret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2br’ Instruction¶br i1 <cond>, label <iftrue>, label <iffalse>br label <dest> ; Unconditional branchThe ‘br’ instruction is used to cause control flow to transfer to adifferent basic block in the current function. There are two forms ofthis instruction, corresponding to a conditional branch and anunconditional branch.
The conditional branch form of the ‘br’ instruction takes a single‘i1’ value and two ‘label’ values. The unconditional form of the‘br’ instruction takes a single ‘label’ value as a target.
Upon execution of a conditional ‘br’ instruction, the ‘i1’argument is evaluated. If the value is true, control flows to the‘iftrue’ label argument. If “cond” is false, control flowsto the ‘iffalse’ label argument.If ‘cond’ is poison or undef, this instruction has undefinedbehavior.
Test: %cond = icmp eq i32 %a, %b br i1 %cond, label %IfEqual, label %IfUnequalIfEqual: ret i32 1IfUnequal: ret i32 0switch’ Instruction¶switch <intty> <value>, label <defaultdest> [ <intty> <val>, label <dest> ... ]The ‘switch’ instruction is used to transfer control flow to one ofseveral different places. It is a generalization of the ‘br’instruction, allowing a branch to occur to one of many possibledestinations.
The ‘switch’ instruction uses three parameters: an integercomparison value ‘value’, a default ‘label’ destination, and anarray of pairs of comparison value constants and ‘label’s. The tableis not allowed to contain duplicate constant entries.
The switch instruction specifies a table of values and destinations.When the ‘switch’ instruction is executed, this table is searchedfor the given value. If the value is found, control flow is transferredto the corresponding destination; otherwise, control flow is transferredto the default destination.If ‘value’ is poison or undef, this instruction has undefinedbehavior.
Depending on properties of the target machine and the particularswitch instruction, this instruction may be code generated indifferent ways. For example, it could be generated as a series ofchained conditional branches or with a lookup table.
; Emulate a conditional br instruction%Val = zext i1 %value to i32switch i32 %Val, label %truedest [ i32 0, label %falsedest ]; Emulate an unconditional br instructionswitch i32 0, label %dest [ ]; Implement a jump table:switch i32 %val, label %otherwise [ i32 0, label %onzero i32 1, label %onone i32 2, label %ontwo ]indirectbr’ Instruction¶indirectbr ptr <address>, [ label <dest1>, label <dest2>, ... ]The ‘indirectbr’ instruction implements an indirect branch to alabel within the current function, whose address is specified by“address”. Address must be derived from ablockaddress constant.
The ‘address’ argument is the address of the label to jump to. Therest of the arguments indicate the full set of possible destinationsthat the address may point to. Blocks are allowed to occur multipletimes in the destination list, though this isn’t particularly useful.
This destination list is required so that dataflow analysis has anaccurate understanding of the CFG.
Control transfers to the block specified in the address argument. Allpossible destination blocks must be listed in the label list, otherwisethis instruction has undefined behavior. This implies that jumps tolabels defined in other functions have undefined behavior as well.If ‘address’ is poison or undef, this instruction has undefinedbehavior.
This is typically implemented with a jump through a register.
indirectbr ptr %Addr, [ label %bb1, label %bb2, label %bb3 ]invoke’ Instruction¶<result> = invoke [cconv] [ret attrs] [addrspace(<num>)] <ty>|<fnty> <fnptrval>(<function args>) [fn attrs] [operand bundles] to label <normal label> unwind label <exception label>The ‘invoke’ instruction causes control to transfer to a specifiedfunction, with the possibility of control flow transfer to either the‘normal’ label or the ‘exception’ label. If the callee functionreturns with the “ret” instruction, control flow will return to the“normal” label. If the callee (or any indirect callees) returns via the“resume” instruction or other exception handlingmechanism, control is interrupted and continued at the dynamicallynearest “exception” label.
The ‘exception’ label is a landingpad for the exception. As such,‘exception’ label is required to have the“landingpad” instruction, which contains theinformation about the behavior of the program after unwinding happens,as its first non-PHI instruction. The restrictions on the“landingpad” instruction’s tightly couples it to the “invoke”instruction, so that the important information contained within the“landingpad” instruction can’t be lost through normal code motion.
This instruction requires several arguments:
zeroext’, ‘signext’, and ‘inreg’ attributesare valid here.ty’: the type of the call instruction itself which is also thetype of the return value. Functions that return no value are markedvoid.fnty’: shall be the signature of the function being invoked. Theargument types must match the types implied by this signature. Thistype can be omitted if the function is not varargs.fnptrval’: An LLVM value containing a pointer to a function tobe invoked. In most cases, this is a direct function invocation, butindirect invoke’s are just as possible, calling an arbitrary pointerto function value.function args’: argument list whose types match the functionsignature argument types and parameter attributes. All arguments mustbe of first class type. If the function signatureindicates the function accepts a variable number of arguments, theextra arguments can be specified.normal label’: the label reached when the called functionexecutes a ‘ret’ instruction.exception label’: the label reached when a callee returns viathe resume instruction or other exception handlingmechanism.This instruction is designed to operate as a standard ‘call’instruction in most regards. The primary difference is that itestablishes an association with a label, which is used by the runtimelibrary to unwind the stack.
This instruction is used in languages with destructors to ensure thatproper cleanup is performed in the case of either a longjmp or athrown exception. Additionally, this is important for implementation of‘catch’ clauses in high-level languages that support them.
For the purposes of the SSA form, the definition of the value returnedby the ‘invoke’ instruction is deemed to occur on the edge from thecurrent block to the “normal” label. If the callee unwinds then noreturn value is available.
%retval = invoke i32 @Test(i32 15) to label %Continue unwind label %TestCleanup ; i32:retval set%retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue unwind label %TestCleanup ; i32:retval setcallbr’ Instruction¶<result> = callbr [cconv] [ret attrs] [addrspace(<num>)] <ty>|<fnty> <fnptrval>(<function args>) [fn attrs] [operand bundles] to label <fallthrough label> [indirect labels]The ‘callbr’ instruction causes control to transfer to a specifiedfunction, with the possibility of control flow transfer to either the‘fallthrough’ label or one of the ‘indirect’ labels.
This instruction should only be used to implement the “goto” feature of gccstyle inline assembly. Any other usage is an error in the IR verifier.
This instruction requires several arguments:
zeroext’, ‘signext’, and ‘inreg’ attributesare valid here.ty’: the type of the call instruction itself which is also thetype of the return value. Functions that return no value are markedvoid.fnty’: shall be the signature of the function being called. Theargument types must match the types implied by this signature. Thistype can be omitted if the function is not varargs.fnptrval’: An LLVM value containing a pointer to a function tobe called. In most cases, this is a direct function call, butother callbr’s are just as possible, calling an arbitrary pointerto function value.function args’: argument list whose types match the functionsignature argument types and parameter attributes. All arguments mustbe of first class type. If the function signatureindicates the function accepts a variable number of arguments, theextra arguments can be specified.fallthrough label’: the label reached when the inline assembly’sexecution exits the bottom.indirect labels’: the labels reached when a callee transfers controlto a location other than the ‘fallthrough label’. Label constraintsrefer to these destinations.This instruction is designed to operate as a standard ‘call’instruction in most regards. The primary difference is that itestablishes an association with additional labels to define where controlflow goes after the call.
The output values of a ‘callbr’ instruction are available only tothe ‘fallthrough’ block, not to any ‘indirect’ blocks(s).
The only use of this today is to implement the “goto” feature of gcc inlineassembly where additional labels can be provided as locations for the inlineassembly to jump to.
; "asm goto" without output constraints.callbr void asm "", "r,!i"(i32 %x) to label %fallthrough [label %indirect]; "asm goto" with output constraints.<result> = callbr i32 asm "", "=r,r,!i"(i32 %x) to label %fallthrough [label %indirect]resume’ Instruction¶resume <type> <value>The ‘resume’ instruction is a terminator instruction that has nosuccessors.
The ‘resume’ instruction requires one argument, which must have thesame type as the result of any ‘landingpad’ instruction in the samefunction.
The ‘resume’ instruction resumes propagation of an existing(in-flight) exception whose unwinding was interrupted with alandingpad instruction.
resume { ptr, i32 } %exncatchswitch’ Instruction¶<resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind to caller<resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind label <default>The ‘catchswitch’ instruction is used by LLVM’s exception handling system to describe the set of possible catch handlersthat may be executed by the EH personality routine.
The parent argument is the token of the funclet that contains thecatchswitch instruction. If the catchswitch is not inside a funclet,this operand may be the token none.
The default argument is the label of another basic block beginning witheither a cleanuppad or catchswitch instruction. This unwind destinationmust be a legal target with respect to the parent links, as described inthe exception handling documentation.
The handlers are a nonempty list of successor blocks that each begin with acatchpad instruction.
Executing this instruction transfers control to one of the successors inhandlers, if appropriate, or continues to unwind via the unwind label ifpresent.
The catchswitch is both a terminator and a “pad” instruction, meaning thatit must be both the first non-phi instruction and last instruction in the basicblock. Therefore, it must be the only non-phi instruction in the block.
dispatch1: %cs1 = catchswitch within none [label %handler0, label %handler1] unwind to callerdispatch2: %cs2 = catchswitch within %parenthandler [label %handler0] unwind label %cleanupcatchret’ Instruction¶catchret from <token> to label <normal>The ‘catchret’ instruction is a terminator instruction that has asingle successor.
The first argument to a ‘catchret’ indicates which catchpad itexits. It must be a catchpad.The second argument to a ‘catchret’ specifies where control willtransfer to next.
The ‘catchret’ instruction ends an existing (in-flight) exception whoseunwinding was interrupted with a catchpad instruction. Thepersonality function gets a chance to execute arbitrarycode to, for example, destroy the active exception. Control then transfers tonormal.
The token argument must be a token produced by a catchpad instruction.If the specified catchpad is not the most-recently-entered not-yet-exitedfunclet pad (as described in the EH documentation),the catchret’s behavior is undefined.
catchret from %catch to label %continuecleanupret’ Instruction¶cleanupret from <value> unwind label <continue>cleanupret from <value> unwind to callerThe ‘cleanupret’ instruction is a terminator instruction that hasan optional successor.
The ‘cleanupret’ instruction requires one argument, which indicateswhich cleanuppad it exits, and must be a cleanuppad.If the specified cleanuppad is not the most-recently-entered not-yet-exitedfunclet pad (as described in the EH documentation),the cleanupret’s behavior is undefined.
The ‘cleanupret’ instruction also has an optional successor, continue,which must be the label of another basic block beginning with either acleanuppad or catchswitch instruction. This unwind destination mustbe a legal target with respect to the parent links, as described in theexception handling documentation.
The ‘cleanupret’ instruction indicates to thepersonality function that onecleanuppad it transferred control to has ended.It transfers control to continue or unwinds out of the function.
cleanupret from %cleanup unwind to callercleanupret from %cleanup unwind label %continueunreachable’ Instruction¶unreachableThe ‘unreachable’ instruction has no defined semantics. Thisinstruction is used to inform the optimizer that a particular portion ofthe code is not reachable. This can be used to indicate that the codeafter a no-return function cannot be reached, and other facts.
The ‘unreachable’ instruction has no defined semantics.
fneg’ Instruction¶<result> = fneg [fast-math flags]* <ty> <op1> ; yields ty:resultThe ‘fneg’ instruction returns the negation of its operand.
The argument to the ‘fneg’ instruction must be afloating-point or vector offloating-point values.
The value produced is a copy of the operand with its sign bit flipped.This instruction can also take any number of fast-mathflags, which are optimization hints to enable otherwiseunsafe floating-point optimizations:
<result> = fneg float %val ; yields float:result = -%varadd’ Instruction¶<result> = add <ty> <op1>, <op2> ; yields ty:result<result> = add nuw <ty> <op1>, <op2> ; yields ty:result<result> = add nsw <ty> <op1>, <op2> ; yields ty:result<result> = add nuw nsw <ty> <op1>, <op2> ; yields ty:resultThe ‘add’ instruction returns the sum of its two operands.
The two arguments to the ‘add’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
The value produced is the integer sum of the two operands.
If the sum has unsigned overflow, the result returned is themathematical result modulo 2n, where n is the bit width ofthe result.
Because LLVM integers use a two’s complement representation, thisinstruction is appropriate for both signed and unsigned integers.
nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”,respectively. If the nuw and/or nsw keywords are present, theresult value of the add is a poison value ifunsigned and/or signed overflow, respectively, occurs.
<result> = add i32 4, %var ; yields i32:result = 4 + %varfadd’ Instruction¶<result> = fadd [fast-math flags]* <ty> <op1>, <op2> ; yields ty:resultThe ‘fadd’ instruction returns the sum of its two operands.
The two arguments to the ‘fadd’ instruction must befloating-point or vector offloating-point values. Both arguments must have identical types.
The value produced is the floating-point sum of the two operands.This instruction is assumed to execute in the default floating-pointenvironment.This instruction can also take any number of fast-mathflags, which are optimization hints to enable otherwiseunsafe floating-point optimizations:
<result> = fadd float 4.0, %var ; yields float:result = 4.0 + %varsub’ Instruction¶<result> = sub <ty> <op1>, <op2> ; yields ty:result<result> = sub nuw <ty> <op1>, <op2> ; yields ty:result<result> = sub nsw <ty> <op1>, <op2> ; yields ty:result<result> = sub nuw nsw <ty> <op1>, <op2> ; yields ty:resultThe ‘sub’ instruction returns the difference of its two operands.
Note that the ‘sub’ instruction is used to represent the ‘neg’instruction present in most other intermediate representations.
The two arguments to the ‘sub’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
The value produced is the integer difference of the two operands.
If the difference has unsigned overflow, the result returned is themathematical result modulo 2n, where n is the bit width ofthe result.
Because LLVM integers use a two’s complement representation, thisinstruction is appropriate for both signed and unsigned integers.
nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”,respectively. If the nuw and/or nsw keywords are present, theresult value of the sub is a poison value ifunsigned and/or signed overflow, respectively, occurs.
<result> = sub i32 4, %var ; yields i32:result = 4 - %var<result> = sub i32 0, %val ; yields i32:result = -%varfsub’ Instruction¶<result> = fsub [fast-math flags]* <ty> <op1>, <op2> ; yields ty:resultThe ‘fsub’ instruction returns the difference of its two operands.
The two arguments to the ‘fsub’ instruction must befloating-point or vector offloating-point values. Both arguments must have identical types.
The value produced is the floating-point difference of the two operands.This instruction is assumed to execute in the default floating-pointenvironment.This instruction can also take any number of fast-mathflags, which are optimization hints to enable otherwiseunsafe floating-point optimizations:
<result> = fsub float 4.0, %var ; yields float:result = 4.0 - %var<result> = fsub float -0.0, %val ; yields float:result = -%varmul’ Instruction¶<result> = mul <ty> <op1>, <op2> ; yields ty:result<result> = mul nuw <ty> <op1>, <op2> ; yields ty:result<result> = mul nsw <ty> <op1>, <op2> ; yields ty:result<result> = mul nuw nsw <ty> <op1>, <op2> ; yields ty:resultThe ‘mul’ instruction returns the product of its two operands.
The two arguments to the ‘mul’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
The value produced is the integer product of the two operands.
If the result of the multiplication has unsigned overflow, the resultreturned is the mathematical result modulo 2n, where n is thebit width of the result.
Because LLVM integers use a two’s complement representation, and theresult is the same width as the operands, this instruction returns thecorrect result for both signed and unsigned integers. If a full product(e.g. i32 * i32 -> i64) is needed, the operands should besign-extended or zero-extended as appropriate to the width of the fullproduct.
nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”,respectively. If the nuw and/or nsw keywords are present, theresult value of the mul is a poison value ifunsigned and/or signed overflow, respectively, occurs.
<result> = mul i32 4, %var ; yields i32:result = 4 * %varfmul’ Instruction¶<result> = fmul [fast-math flags]* <ty> <op1>, <op2> ; yields ty:resultThe ‘fmul’ instruction returns the product of its two operands.
The two arguments to the ‘fmul’ instruction must befloating-point or vector offloating-point values. Both arguments must have identical types.
The value produced is the floating-point product of the two operands.This instruction is assumed to execute in the default floating-pointenvironment.This instruction can also take any number of fast-mathflags, which are optimization hints to enable otherwiseunsafe floating-point optimizations:
<result> = fmul float 4.0, %var ; yields float:result = 4.0 * %varudiv’ Instruction¶<result> = udiv <ty> <op1>, <op2> ; yields ty:result<result> = udiv exact <ty> <op1>, <op2> ; yields ty:resultThe ‘udiv’ instruction returns the quotient of its two operands.
The two arguments to the ‘udiv’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
The value produced is the unsigned integer quotient of the two operands.
Note that unsigned integer division and signed integer division aredistinct operations; for signed integer division, use ‘sdiv’.
Division by zero is undefined behavior. For vectors, if any elementof the divisor is zero, the operation has undefined behavior.
If the exact keyword is present, the result value of the udiv isa poison value if %op1 is not a multiple of %op2 (assuch, “((a udiv exact b) mul b) == a”).
<result> = udiv i32 4, %var ; yields i32:result = 4 / %varsdiv’ Instruction¶<result> = sdiv <ty> <op1>, <op2> ; yields ty:result<result> = sdiv exact <ty> <op1>, <op2> ; yields ty:resultThe ‘sdiv’ instruction returns the quotient of its two operands.
The two arguments to the ‘sdiv’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
The value produced is the signed integer quotient of the two operandsrounded towards zero.
Note that signed integer division and unsigned integer division aredistinct operations; for unsigned integer division, use ‘udiv’.
Division by zero is undefined behavior. For vectors, if any elementof the divisor is zero, the operation has undefined behavior.Overflow also leads to undefined behavior; this is a rare case, but canoccur, for example, by doing a 32-bit division of -2147483648 by -1.
If the exact keyword is present, the result value of the sdiv isa poison value if the result would be rounded.
<result> = sdiv i32 4, %var ; yields i32:result = 4 / %varfdiv’ Instruction¶<result> = fdiv [fast-math flags]* <ty> <op1>, <op2> ; yields ty:resultThe ‘fdiv’ instruction returns the quotient of its two operands.
The two arguments to the ‘fdiv’ instruction must befloating-point or vector offloating-point values. Both arguments must have identical types.
The value produced is the floating-point quotient of the two operands.This instruction is assumed to execute in the default floating-pointenvironment.This instruction can also take any number of fast-mathflags, which are optimization hints to enable otherwiseunsafe floating-point optimizations:
<result> = fdiv float 4.0, %var ; yields float:result = 4.0 / %varurem’ Instruction¶<result> = urem <ty> <op1>, <op2> ; yields ty:resultThe ‘urem’ instruction returns the remainder from the unsigneddivision of its two arguments.
The two arguments to the ‘urem’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
This instruction returns the unsigned integer remainder of a division.This instruction always performs an unsigned division to get theremainder.
Note that unsigned integer remainder and signed integer remainder aredistinct operations; for signed integer remainder, use ‘srem’.
Taking the remainder of a division by zero is undefined behavior.For vectors, if any element of the divisor is zero, the operation hasundefined behavior.
<result> = urem i32 4, %var ; yields i32:result = 4 % %varsrem’ Instruction¶<result> = srem <ty> <op1>, <op2> ; yields ty:resultThe ‘srem’ instruction returns the remainder from the signeddivision of its two operands. This instruction can also takevector versions of the values in which case the elementsmust be integers.
The two arguments to the ‘srem’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
This instruction returns the remainder of a division (where the resultis either zero or has the same sign as the dividend, op1), not themodulo operator (where the result is either zero or has the same signas the divisor, op2) of a value. For more information about thedifference, see The MathForum. For atable of how this is implemented in various languages, please seeWikipedia: modulooperation.
Note that signed integer remainder and unsigned integer remainder aredistinct operations; for unsigned integer remainder, use ‘urem’.
Taking the remainder of a division by zero is undefined behavior.For vectors, if any element of the divisor is zero, the operation hasundefined behavior.Overflow also leads to undefined behavior; this is a rare case, but canoccur, for example, by taking the remainder of a 32-bit division of-2147483648 by -1. (The remainder doesn’t actually overflow, but thisrule lets srem be implemented using instructions that return both theresult of the division and the remainder.)
<result> = srem i32 4, %var ; yields i32:result = 4 % %varfrem’ Instruction¶<result> = frem [fast-math flags]* <ty> <op1>, <op2> ; yields ty:resultThe ‘frem’ instruction returns the remainder from the division ofits two operands.
The two arguments to the ‘frem’ instruction must befloating-point or vector offloating-point values. Both arguments must have identical types.
The value produced is the floating-point remainder of the two operands.This is the same output as a libm ‘fmod’ function, but without anypossibility of setting errno. The remainder has the same sign as thedividend.This instruction is assumed to execute in the default floating-pointenvironment.This instruction can also take any number of fast-mathflags, which are optimization hints to enable otherwiseunsafe floating-point optimizations:
<result> = frem float 4.0, %var ; yields float:result = 4.0 % %varshl’ Instruction¶<result> = shl <ty> <op1>, <op2> ; yields ty:result<result> = shl nuw <ty> <op1>, <op2> ; yields ty:result<result> = shl nsw <ty> <op1>, <op2> ; yields ty:result<result> = shl nuw nsw <ty> <op1>, <op2> ; yields ty:resultThe ‘shl’ instruction returns the first operand shifted to the lefta specified number of bits.
Both arguments to the ‘shl’ instruction must be the sameinteger or vector of integer type.‘op2’ is treated as an unsigned value.
The value produced is op1 * 2op2 mod 2n,where n is the width of the result. If op2 is (statically ordynamically) equal to or larger than the number of bits inop1, this instruction returns a poison value.If the arguments are vectors, each vector element of op1 is shiftedby the corresponding shift amount in op2.
If the nuw keyword is present, then the shift produces a poisonvalue if it shifts out any non-zero bits.If the nsw keyword is present, then the shift produces a poisonvalue if it shifts out any bits that disagree with the resultant sign bit.
<result> = shl i32 4, %var ; yields i32: 4 << %var<result> = shl i32 4, 2 ; yields i32: 16<result> = shl i32 1, 10 ; yields i32: 1024<result> = shl i32 1, 32 ; undefined<result> = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2> ; yields: result=<2 x i32> < i32 2, i32 4>lshr’ Instruction¶<result> = lshr <ty> <op1>, <op2> ; yields ty:result<result> = lshr exact <ty> <op1>, <op2> ; yields ty:resultThe ‘lshr’ instruction (logical shift right) returns the firstoperand shifted to the right a specified number of bits with zero fill.
Both arguments to the ‘lshr’ instruction must be the sameinteger or vector of integer type.‘op2’ is treated as an unsigned value.
This instruction always performs a logical shift right operation. Themost significant bits of the result will be filled with zero bits afterthe shift. If op2 is (statically or dynamically) equal to or largerthan the number of bits in op1, this instruction returns a poisonvalue. If the arguments are vectors, each vector elementof op1 is shifted by the corresponding shift amount in op2.
If the exact keyword is present, the result value of the lshr isa poison value if any of the bits shifted out are non-zero.
<result> = lshr i32 4, 1 ; yields i32:result = 2<result> = lshr i32 4, 2 ; yields i32:result = 1<result> = lshr i8 4, 3 ; yields i8:result = 0<result> = lshr i8 -2, 1 ; yields i8:result = 0x7F<result> = lshr i32 1, 32 ; undefined<result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2> ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>ashr’ Instruction¶<result> = ashr <ty> <op1>, <op2> ; yields ty:result<result> = ashr exact <ty> <op1>, <op2> ; yields ty:resultThe ‘ashr’ instruction (arithmetic shift right) returns the firstoperand shifted to the right a specified number of bits with signextension.
Both arguments to the ‘ashr’ instruction must be the sameinteger or vector of integer type.‘op2’ is treated as an unsigned value.
This instruction always performs an arithmetic shift right operation,The most significant bits of the result will be filled with the sign bitof op1. If op2 is (statically or dynamically) equal to or largerthan the number of bits in op1, this instruction returns a poisonvalue. If the arguments are vectors, each vector elementof op1 is shifted by the corresponding shift amount in op2.
If the exact keyword is present, the result value of the ashr isa poison value if any of the bits shifted out are non-zero.
<result> = ashr i32 4, 1 ; yields i32:result = 2<result> = ashr i32 4, 2 ; yields i32:result = 1<result> = ashr i8 4, 3 ; yields i8:result = 0<result> = ashr i8 -2, 1 ; yields i8:result = -1<result> = ashr i32 1, 32 ; undefined<result> = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3> ; yields: result=<2 x i32> < i32 -1, i32 0>and’ Instruction¶<result> = and <ty> <op1>, <op2> ; yields ty:resultThe ‘and’ instruction returns the bitwise logical and of its twooperands.
The two arguments to the ‘and’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
<result> = and i32 4, %var ; yields i32:result = 4 & %var<result> = and i32 15, 40 ; yields i32:result = 8<result> = and i32 4, 8 ; yields i32:result = 0or’ Instruction¶<result> = or <ty> <op1>, <op2> ; yields ty:resultThe ‘or’ instruction returns the bitwise logical inclusive or of itstwo operands.
The two arguments to the ‘or’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
<result> = or i32 4, %var ; yields i32:result = 4 | %var<result> = or i32 15, 40 ; yields i32:result = 47<result> = or i32 4, 8 ; yields i32:result = 12xor’ Instruction¶<result> = xor <ty> <op1>, <op2> ; yields ty:resultThe ‘xor’ instruction returns the bitwise logical exclusive or ofits two operands. The xor is used to implement the “one’scomplement” operation, which is the “~” operator in C.
The two arguments to the ‘xor’ instruction must beinteger or vector of integer values. Botharguments must have identical types.
<result> = xor i32 4, %var ; yields i32:result = 4 ^ %var<result> = xor i32 15, 40 ; yields i32:result = 39<result> = xor i32 4, 8 ; yields i32:result = 12<result> = xor i32 %V, -1 ; yields i32:result = ~%Vextractelement’ Instruction¶<result> = extractelement <n x <ty>> <val>, <ty2> <idx> ; yields <ty><result> = extractelement <vscale x n x <ty>> <val>, <ty2> <idx> ; yields <ty>The ‘extractelement’ instruction extracts a single scalar elementfrom a vector at a specified index.
The first operand of an ‘extractelement’ instruction is a value ofvector type. The second operand is an index indicatingthe position from which to extract the element. The index may be avariable of any integer type, and will be treated as an unsigned integer.
The result is a scalar of the same type as the element type of val.Its value is the value at position idx of val. If idxexceeds the length of val for a fixed-length vector, the result is apoison value. For a scalable vector, if the valueof idx exceeds the runtime length of the vector, the result is apoison value.
<result> = extractelement <4 x i32> %vec, i32 0 ; yields i32insertelement’ Instruction¶<result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> ; yields <n x <ty>><result> = insertelement <vscale x n x <ty>> <val>, <ty> <elt>, <ty2> <idx> ; yields <vscale x n x <ty>>The ‘insertelement’ instruction inserts a scalar element into avector at a specified index.
The first operand of an ‘insertelement’ instruction is a value ofvector type. The second operand is a scalar value whosetype must equal the element type of the first operand. The third operandis an index indicating the position at which to insert the value. Theindex may be a variable of any integer type, and will be treated as anunsigned integer.
The result is a vector of the same type as val. Its element valuesare those of val except at position idx, where it gets the valueelt. If idx exceeds the length of val for a fixed-length vector,the result is a poison value. For a scalable vector,if the value of idx exceeds the runtime length of the vector, the resultis a poison value.
<result> = insertelement <4 x i32> %vec, i32 1, i32 0 ; yields <4 x i32>shufflevector’ Instruction¶<result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask> ; yields <m x <ty>><result> = shufflevector <vscale x n x <ty>> <v1>, <vscale x n x <ty>> v2, <vscale x m x i32> <mask> ; yields <vscale x m x <ty>>The ‘shufflevector’ instruction constructs a permutation of elementsfrom two input vectors, returning a vector with the same element type asthe input and length that is the same as the shuffle mask.
The first two operands of a ‘shufflevector’ instruction are vectorswith the same type. The third argument is a shuffle mask vector constantwhose element type is i32. The mask vector elements must be constantintegers or undef values. The result of the instruction is a vectorwhose length is the same as the shuffle mask and whose element type is thesame as the element type of the first two operands.
The elements of the two input vectors are numbered from left to rightacross both of the vectors. For each element of the result vector, theshuffle mask selects an element from one of the input vectors to copyto the result. Non-negative elements in the mask represent an indexinto the concatenated pair of input vectors.
If the shuffle mask is undefined, the result vector is undefined. Ifthe shuffle mask selects an undefined element from one of the inputvectors, the resulting element is undefined. An undefined elementin the mask vector specifies that the resulting element is undefined.An undefined element in the mask vector prevents a poisoned vectorelement from propagating.
For scalable vectors, the only valid mask values at present arezeroinitializer and undef, since we cannot write all indices asliterals for a vector with a length unknown at compile time.
<result> = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> ; yields <4 x i32><result> = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; yields <4 x i32> - Identity shuffle.<result> = shufflevector <8 x i32> %v1, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; yields <4 x i32><result> = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > ; yields <8 x i32>extractvalue’ Instruction¶<result> = extractvalue <aggregate type> <val>, <idx>{, <idx>}*The ‘extractvalue’ instruction extracts the value of a member fieldfrom an aggregate value.
The first operand of an ‘extractvalue’ instruction is a value ofstruct or array type. The other operands areconstant indices to specify which value to extract in a similar manneras indices in a ‘getelementptr’ instruction.
The major differences to getelementptr indexing are:
The result is the value at the position in the aggregate specified bythe index operands.
<result> = extractvalue {i32, float} %agg, 0 ; yields i32insertvalue’ Instruction¶<result> = insertvalue <aggregate type> <val>, <ty> <elt>, <idx>{, <idx>}* ; yields <aggregate type>The first operand of an ‘insertvalue’ instruction is a value ofstruct or array type. The second operand isa first-class value to insert. The following operands are constantindices indicating the position at which to insert the value in asimilar manner as indices in a ‘extractvalue’ instruction. The valueto insert must have the same type as the value identified by theindices.
The result is an aggregate of the same type as val. Its value isthat of val except that the value at the position specified by theindices is that of elt.
%agg1 = insertvalue {i32, float} undef, i32 1, 0 ; yields {i32 1, float undef}%agg2 = insertvalue {i32, float} %agg1, float %val, 1 ; yields {i32 1, float %val}%agg3 = insertvalue {i32, {float}} undef, float %val, 1, 0 ; yields {i32 undef, {float %val}}alloca’ Instruction¶<result> = alloca [inalloca] <type> [, <ty> <NumElements>] [, align <alignment>] [, addrspace(<num>)] ; yields type addrspace(num)*:resultThe ‘alloca’ instruction allocates memory on the stack frame of thecurrently executing function, to be automatically released when thisfunction returns to its caller. If the address space is not explicitlyspecified, the object is allocated in the alloca address space from thedatalayout string.
The ‘alloca’ instruction allocates sizeof(<type>)*NumElementsbytes of memory on the runtime stack, returning a pointer of theappropriate type to the program. If “NumElements” is specified, it isthe number of elements allocated, otherwise “NumElements” is defaultedto be one. If a constant alignment is specified, the value result of theallocation is guaranteed to be aligned to at least that boundary. Thealignment may not be greater than 1 << 32. If not specified, or ifzero, the target can choose to align the allocation on any convenientboundary compatible with the type.
‘type’ may be any sized type.
Memory is allocated; a pointer is returned. The allocated memory isuninitialized, and loading from uninitialized memory produces an undefinedvalue. The operation itself is undefined if there is insufficient stackspace for the allocation.’alloca’d memory is automatically releasedwhen the function returns. The ‘alloca’ instruction is commonly usedto represent automatic variables that must have an address available. Whenthe function returns (either with the ret or resume instructions),the memory is reclaimed. Allocating zero bytes is legal, but the returnedpointer may not be unique. The order in which memory is allocated (ie.,which way the stack grows) is not specified.
Note that ‘alloca’ outside of the alloca address space from thedatalayout string is meaningful only if thetarget has assigned it a semantics.
If the returned pointer is used by llvm.lifetime.start,the returned object is initially dead.See llvm.lifetime.start andllvm.lifetime.end for the precise semantics oflifetime-manipulating intrinsics.
%ptr = alloca i32 ; yields ptr%ptr = alloca i32, i32 4 ; yields ptr%ptr = alloca i32, i32 4, align 1024 ; yields ptr%ptr = alloca i32, align 1024 ; yields ptrload’ Instruction¶<result> = load [volatile] <ty>, ptr <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.load !<empty_node>][, !invariant.group !<empty_node>][, !nonnull !<empty_node>][, !dereferenceable !<deref_bytes_node>][, !dereferenceable_or_null !<deref_bytes_node>][, !align !<align_node>][, !noundef !<empty_node>]<result> = load atomic [volatile] <ty>, ptr <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>]!<nontemp_node> = !{ i32 1 }!<empty_node> = !{}!<deref_bytes_node> = !{ i64 <dereferenceable_bytes> }!<align_node> = !{ i64 <value_alignment> }The ‘load’ instruction is used to read from memory.
The argument to the load instruction specifies the memory address from whichto load. The type specified must be a first class type ofknown size (i.e. not containing an opaque structural type). Ifthe load is marked as volatile, then the optimizer is not allowed tomodify the number or order of execution of this load with othervolatile operations.
If the load is marked as atomic, it takes an extra ordering and optional syncscope("<target-scope>") argument. Therelease and acq_rel orderings are not valid on load instructions.Atomic loads produce defined results when they may seemultiple atomic stores. The type of the pointee must be an integer, pointer, orfloating-point type whose bit width is a power of two greater than or equal toeight and less than or equal to a target-specific size limit. align must beexplicitly specified on atomic loads, and the load has undefined behavior if thealignment is not set to a value which is at least the size in bytes of thepointee. !nontemporal does not have any defined semantics for atomic loads.
The optional constant align argument specifies the alignment of theoperation (that is, the alignment of the memory address). A value of 0or an omitted align argument means that the operation has the ABIalignment for the target. It is the responsibility of the code emitterto ensure that the alignment information is correct. Overestimating thealignment results in undefined behavior. Underestimating the alignmentmay produce less efficient code. An alignment of 1 is always safe. Themaximum possible alignment is 1 << 32. An alignment value higherthan the size of the loaded type implies memory up to the alignmentvalue bytes can be safely loaded without trapping in the defaultaddress space. Access of the high bytes can interfere with debuggingtools, so should not be accessed if the function has thesanitize_thread or sanitize_address attributes.
The optional !nontemporal metadata must reference a singlemetadata name <nontemp_node> corresponding to a metadata node with onei32 entry of value 1. The existence of the !nontemporalmetadata on the instruction tells the optimizer and code generatorthat this load is not expected to be reused in the cache. The codegenerator may select special instructions to save cache bandwidth, suchas the MOVNT instruction on x86.
The optional !invariant.load metadata must reference a singlemetadata name <empty_node> corresponding to a metadata node with noentries. If a load instruction tagged with the !invariant.loadmetadata is executed, the memory location referenced by the load hasto contain the same value at all points in the program where thememory location is dereferenceable; otherwise, the behavior isundefined.
!invariant.group metadata must reference a single metadata name<empty_node> corresponding to a metadata node with no entries.See invariant.group metadata invariant.group.The optional !nonnull metadata must reference a singlemetadata name <empty_node> corresponding to a metadata node with noentries. The existence of the !nonnull metadata on theinstruction tells the optimizer that the value loaded is known tonever be null. If the value is null at runtime, the behavior is undefined.This is analogous to the nonnull attribute on parameters and returnvalues. This metadata can only be applied to loads of a pointer type.
The optional !dereferenceable metadata must reference a single metadataname <deref_bytes_node> corresponding to a metadata node with one i64entry.See dereferenceable metadata dereferenceable.
The optional !dereferenceable_or_null metadata must reference a singlemetadata name <deref_bytes_node> corresponding to a metadata node with onei64 entry.See dereferenceable_or_null metadata dereferenceable_or_null.
The optional !align metadata must reference a single metadata name<align_node> corresponding to a metadata node with one i64 entry.The existence of the !align metadata on the instruction tells theoptimizer that the value loaded is known to be aligned to a boundary specifiedby the integer value in the metadata node. The alignment must be a power of 2.This is analogous to the ‘’align’’ attribute on parameters and return values.This metadata can only be applied to loads of a pointer type. If the returnedvalue is not appropriately aligned at runtime, the behavior is undefined.
The optional !noundef metadata must reference a single metadata name<empty_node> corresponding to a node with no entries. The existence of!noundef metadata on the instruction tells the optimizer that the valueloaded is known to be well defined.If the value isn’t well defined, the behavior is undefined.
The location of memory pointed to is loaded. If the value being loadedis of scalar type then the number of bytes read does not exceed theminimum number of bytes needed to hold all bits of the type. Forexample, loading an i24 reads at most three bytes. When loading avalue of a type like i20 with a size that is not an integral numberof bytes, the result is undefined if the value was not originallywritten using a store of the same type.If the value being loaded is of aggregate type, the bytes that correspond topadding may be accessed but are ignored, because it is impossible to observepadding from the loaded aggregate value.If <pointer> is not a well-defined value, the behavior is undefined.
%ptr = alloca i32 ; yields ptrstore i32 3, ptr %ptr ; yields void%val = load i32, ptr %ptr ; yields i32:val = i32 3store’ Instruction¶store [volatile] <ty> <value>, ptr <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.group !<empty_node>] ; yields voidstore atomic [volatile] <ty> <value>, ptr <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>] ; yields void!<nontemp_node> = !{ i32 1 }!<empty_node> = !{}The ‘store’ instruction is used to write to memory.
There are two arguments to the store instruction: a value to store and anaddress at which to store it. The type of the <pointer> operand must be apointer to the first class type of the <value>operand. If the store is marked as volatile, then the optimizer is notallowed to modify the number or order of execution of this store with othervolatile operations. Only values of first class types of known size (i.e. not containing an opaquestructural type) can be stored.
If the store is marked as atomic, it takes an extra ordering and optional syncscope("<target-scope>") argument. Theacquire and acq_rel orderings aren’t valid on store instructions.Atomic loads produce defined results when they may seemultiple atomic stores. The type of the pointee must be an integer, pointer, orfloating-point type whose bit width is a power of two greater than or equal toeight and less than or equal to a target-specific size limit. align must beexplicitly specified on atomic stores, and the store has undefined behavior ifthe alignment is not set to a value which is at least the size in bytes of thepointee. !nontemporal does not have any defined semantics for atomic stores.
The optional constant align argument specifies the alignment of theoperation (that is, the alignment of the memory address). A value of 0or an omitted align argument means that the operation has the ABIalignment for the target. It is the responsibility of the code emitterto ensure that the alignment information is correct. Overestimating thealignment results in undefined behavior. Underestimating thealignment may produce less efficient code. An alignment of 1 is alwayssafe. The maximum possible alignment is 1 << 32. An alignmentvalue higher than the size of the stored type implies memory up to thealignment value bytes can be stored to without trapping in the defaultaddress space. Storing to the higher bytes however may result in dataraces if another thread can access the same address. Introducing adata race is not allowed. Storing to the extra bytes is not allowedeven in situations where a data race is known to not exist if thefunction has the sanitize_address attribute.
The optional !nontemporal metadata must reference a single metadataname <nontemp_node> corresponding to a metadata node with one i32 entryof value 1. The existence of the !nontemporal metadata on the instructiontells the optimizer and code generator that this load is not expected tobe reused in the cache. The code generator may select specialinstructions to save cache bandwidth, such as the MOVNT instruction onx86.
The optional !invariant.group metadata must reference asingle metadata name <empty_node>. See invariant.group metadata.
The contents of memory are updated to contain <value> at thelocation specified by the <pointer> operand. If <value> isof scalar type then the number of bytes written does not exceed theminimum number of bytes needed to hold all bits of the type. Forexample, storing an i24 writes at most three bytes. When writing avalue of a type like i20 with a size that is not an integral numberof bytes, it is unspecified what happens to the extra bits that do notbelong to the type, but they will typically be overwritten.If <value> is of aggregate type, padding is filled withundef.If <pointer> is not a well-defined value, the behavior is undefined.
%ptr = alloca i32 ; yields ptrstore i32 3, ptr %ptr ; yields void%val = load i32, ptr %ptr ; yields i32:val = i32 3fence’ Instruction¶fence [syncscope("<target-scope>")] <ordering> ; yields voidThe ‘fence’ instruction is used to introduce happens-before edgesbetween operations.
‘fence’ instructions take an ordering argument whichdefines what synchronizes-with edges they add. They can only be givenacquire, release, acq_rel, and seq_cst orderings.
A fence A which has (at least) release ordering semanticssynchronizes with a fence B with (at least) acquire orderingsemantics if and only if there exist atomic operations X and Y, bothoperating on some atomic object M, such that A is sequenced before X, Xmodifies M (either directly or through some side effect of a sequenceheaded by X), Y is sequenced before B, and Y observes M. This provides ahappens-before dependency between A and B. Rather than an explicitfence, one (but not both) of the atomic operations X or Y mightprovide a release or acquire (resp.) ordering constraint andstill synchronize-with the explicit fence and establish thehappens-before edge.
A fence which has seq_cst ordering, in addition to having bothacquire and release semantics specified above, participates inthe global program order of other seq_cst operations and/or fences.
A fence instruction can also take an optional“syncscope” argument.
fence acquire ; yields voidfence syncscope("singlethread") seq_cst ; yields voidfence syncscope("agent") seq_cst ; yields voidcmpxchg’ Instruction¶cmpxchg [weak] [volatile] ptr <pointer>, <ty> <cmp>, <ty> <new> [syncscope("<target-scope>")] <success ordering> <failure ordering>[, align <alignment>] ; yields { ty, i1 }The ‘cmpxchg’ instruction is used to atomically modify memory. Itloads a value in memory and compares it to a given value. If they areequal, it tries to store a new value into the memory.
There are three arguments to the ‘cmpxchg’ instruction: an addressto operate on, a value to compare to the value currently be at thataddress, and a new value to place at that address if the compared valuesare equal. The type of ‘<cmp>’ must be an integer or pointer type whosebit width is a power of two greater than or equal to eight and lessthan or equal to a target-specific size limit. ‘<cmp>’ and ‘<new>’ musthave the same type, and the type of ‘<pointer>’ must be a pointer tothat type. If the cmpxchg is marked as volatile, then theoptimizer is not allowed to modify the number or order of execution ofthis cmpxchg with other volatile operations.
The success and failure ordering arguments specify how thiscmpxchg synchronizes with other atomic operations. Both ordering parametersmust be at least monotonic, the failure ordering cannot be eitherrelease or acq_rel.
A cmpxchg instruction can also take an optional“syncscope” argument.
The instruction can take an optional align attribute.The alignment must be a power of two greater or equal to the size of the<value> type. If unspecified, the alignment is assumed to be equal to thesize of the ‘<value>’ type. Note that this default alignment assumption isdifferent from the alignment used for the load/store instructions when alignisn’t specified.
The pointer passed into cmpxchg must have alignment greater than orequal to the size in memory of the operand.
The contents of memory at the location specified by the ‘<pointer>’ operandis read and compared to ‘<cmp>’; if the values are equal, ‘<new>’ iswritten to the location. The original value at the location is returned,together with a flag indicating success (true) or failure (false).
If the cmpxchg operation is marked as weak then a spurious failure ispermitted: the operation may not write <new> even if the comparisonmatched.
If the cmpxchg operation is strong (the default), the i1 value is 1 if and onlyif the value loaded equals cmp.
A successful cmpxchg is a read-modify-write instruction for the purpose ofidentifying release sequences. A failed cmpxchg is equivalent to an atomicload with an ordering parameter determined the second ordering parameter.
entry: %orig = load atomic i32, ptr %ptr unordered, align 4 ; yields i32 br label %looploop: %cmp = phi i32 [ %orig, %entry ], [%value_loaded, %loop] %squared = mul i32 %cmp, %cmp %val_success = cmpxchg ptr %ptr, i32 %cmp, i32 %squared acq_rel monotonic ; yields { i32, i1 } %value_loaded = extractvalue { i32, i1 } %val_success, 0 %success = extractvalue { i32, i1 } %val_success, 1 br i1 %success, label %done, label %loopdone: ...atomicrmw’ Instruction¶atomicrmw [volatile] <operation> ptr <pointer>, <ty> <value> [syncscope("<target-scope>")] <ordering>[, align <alignment>] ; yields tyThe ‘atomicrmw’ instruction is used to atomically modify memory.
There are three arguments to the ‘atomicrmw’ instruction: anoperation to apply, an address whose value to modify, an argument to theoperation. The operation must be one of the following keywords:
For most of these operations, the type of ‘<value>’ must be an integertype whose bit width is a power of two greater than or equal to eightand less than or equal to a target-specific size limit. For xchg, thismay also be a floating point or a pointer type with the same size constraintsas integers. For fadd/fsub/fmax/fmin, this must be a floating point type. Thetype of the ‘<pointer>’ operand must be a pointer to that type. Ifthe atomicrmw is marked as volatile, then the optimizer is notallowed to modify the number or order of execution of thisatomicrmw with other volatile operations.
The instruction can take an optional align attribute.The alignment must be a power of two greater or equal to the size of the<value> type. If unspecified, the alignment is assumed to be equal to thesize of the ‘<value>’ type. Note that this default alignment assumption isdifferent from the alignment used for the load/store instructions when alignisn’t specified.
A atomicrmw instruction can also take an optional“syncscope” argument.
The contents of memory at the location specified by the ‘<pointer>’operand are atomically read, modified, and written back. The originalvalue at the location is returned. The modification is specified by theoperation argument:
*ptr = val*ptr = *ptr + val*ptr = *ptr - val*ptr = *ptr & val*ptr = ~(*ptr & val)*ptr = *ptr | val*ptr = *ptr ^ val*ptr = *ptr > val ? *ptr : val (using a signed comparison)*ptr = *ptr < val ? *ptr : val (using a signed comparison)*ptr = *ptr > val ? *ptr : val (using an unsigned comparison)*ptr = *ptr < val ? *ptr : val (using an unsigned comparison)*ptr = *ptr + val (using floating point arithmetic)*ptr = *ptr - val (using floating point arithmetic)*ptr = maxnum(*ptr, val) (match the llvm.maxnum.*\` intrinsic)*ptr = minnum(*ptr, val) (match the llvm.minnum.*\` intrinsic)%old = atomicrmw add ptr %ptr, i32 1 acquire ; yields i32getelementptr’ Instruction¶<result> = getelementptr <ty>, ptr <ptrval>{, [inrange] <ty> <idx>}*<result> = getelementptr inbounds <ty>, ptr <ptrval>{, [inrange] <ty> <idx>}*<result> = getelementptr <ty>, <N x ptr> <ptrval>, [inrange] <vector index type> <idx>The ‘getelementptr’ instruction is used to get the address of asubelement of an aggregate data structure. It performsaddress calculation only and does not access memory. The instruction can alsobe used to calculate a vector of such addresses.
The first argument is always a type used as the basis for the calculations.The second argument is always a pointer or a vector of pointers, and is thebase address to start from. The remaining arguments are indicesthat indicate which of the elements of the aggregate object are indexed.The interpretation of each index is dependent on the type being indexedinto. The first index always indexes the pointer value given as thesecond argument, the second index indexes a value of the type pointed to(not necessarily the value directly pointed to, since the first indexcan be non-zero), etc. The first type indexed into must be a pointervalue, subsequent types can be arrays, vectors, and structs. Note thatsubsequent types being indexed into can never be pointers, since thatwould require loading the pointer before continuing calculation.
The type of each index argument depends on the type it is indexing into.When indexing into a (optionally packed) structure, only i32 integerconstants are allowed (when using a vector of indices they must allbe the same i32 integer constant). When indexing into an array,pointer or vector, integers of any width are allowed, and they are notrequired to be constant. These integers are treated as signed valueswhere relevant.
For example, let’s consider a C code fragment and how it gets compiledto LLVM:
struct RT { char A; int B[10][20]; char C;};struct ST { int X; double Y; struct RT Z;};int *foo(struct ST *s) { return &s[1].Z.B[5][13];}The LLVM code generated by Clang is:
%struct.RT = type { i8, [10 x [20 x i32]], i8 }%struct.ST = type { i32, double, %struct.RT }define ptr @foo(ptr %s) nounwind uwtable readnone optsize ssp {entry: %arrayidx = getelementptr inbounds %struct.ST, ptr %s, i64 1, i32 2, i32 1, i64 5, i64 13 ret ptr %arrayidx}In the example above, the first index is indexing into the‘%struct.ST*’ type, which is a pointer, yielding a ‘%struct.ST’= ‘{ i32, double, %struct.RT }’ type, a structure. The second indexindexes into the third element of the structure, yielding a‘%struct.RT’ = ‘{ i8 , [10 x [20 x i32]], i8 }’ type, anotherstructure. The third index indexes into the second element of thestructure, yielding a ‘[10 x [20 x i32]]’ type, an array. The twodimensions of the array are subscripted into, yielding an ‘i32’type. The ‘getelementptr’ instruction returns a pointer to thiselement.
Note that it is perfectly legal to index partially through a structure,returning a pointer to an inner element. Because of this, the LLVM codefor the given testcase is equivalent to:
define ptr @foo(ptr %s) { %t1 = getelementptr %struct.ST, ptr %s, i32 1 %t2 = getelementptr %struct.ST, ptr %t1, i32 0, i32 2 %t3 = getelementptr %struct.RT, ptr %t2, i32 0, i32 1 %t4 = getelementptr [10 x [20 x i32]], ptr %t3, i32 0, i32 5 %t5 = getelementptr [20 x i32], ptr %t4, i32 0, i32 13 ret ptr %t5}If the inbounds keyword is present, the result value of thegetelementptr is a poison value if one of thefollowing rules is violated:
nsw).nsw).nuw).inbounds keywordapplies to each of the computations element-wise.These rules are based on the assumption that no allocated object may crossthe unsigned address space boundary, and no allocated object may be largerthan half the pointer index type space.
If the inbounds keyword is not present, the offsets are added to thebase address with silently-wrapping two’s complement arithmetic. If theoffsets have a different width from the pointer, they are sign-extendedor truncated to the width of the pointer. The result value of thegetelementptr may be outside the object pointed to by the basepointer. The result value may not necessarily be used to access memorythough, even if it happens to point into allocated storage. See thePointer Aliasing Rules section for moreinformation.
If the inrange keyword is present before any index, loading from orstoring to any pointer derived from the getelementptr has undefinedbehavior if the load or store would access memory outside of the bounds ofthe element selected by the index marked as inrange. The result of apointer comparison or ptrtoint (including ptrtoint-like operationsinvolving memory) involving a pointer derived from a getelementptr withthe inrange keyword is undefined, with the exception of comparisonsin the case where both operands are in the range of the element selectedby the inrange keyword, inclusive of the address one past the end ofthat element. Note that the inrange keyword is currently only allowedin constant getelementptr expressions.
The getelementptr instruction is often confusing. For some more insightinto how it works, see the getelementptr FAQ.
%aptr = getelementptr {i32, [12 x i8]}, ptr %saptr, i64 0, i32 1%vptr = getelementptr {i32, <2 x i8>}, ptr %svptr, i64 0, i32 1, i32 1%eptr = getelementptr [12 x i8], ptr %aptr, i64 0, i32 1%iptr = getelementptr [10 x i32], ptr @arr, i16 0, i16 0The getelementptr returns a vector of pointers, instead of a single address,when one or more of its arguments is a vector. In such cases, all vectorarguments should have the same number of elements, and every scalar argumentwill be effectively broadcast into a vector during address calculation.
; All arguments are vectors:; A[i] = ptrs[i] + offsets[i]*sizeof(i8)%A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets; Add the same scalar offset to each pointer of a vector:; A[i] = ptrs[i] + offset*sizeof(i8)%A = getelementptr i8, <4 x ptr> %ptrs, i64 %offset; Add distinct offsets to the same pointer:; A[i] = ptr + offsets[i]*sizeof(i8)%A = getelementptr i8, ptr %ptr, <4 x i64> %offsets; In all cases described above the type of the result is <4 x ptr>The two following instructions are equivalent:
getelementptr %struct.ST, <4 x ptr> %s, <4 x i64> %ind1, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> %ind4, <4 x i64> <i64 13, i64 13, i64 13, i64 13>getelementptr %struct.ST, <4 x ptr> %s, <4 x i64> %ind1, i32 2, i32 1, <4 x i32> %ind4, i64 13Let’s look at the C code, where the vector version of getelementptrmakes sense:
// Let's assume that we vectorize the following loop:double *A, *B; int *C;for (int i = 0; i < size; ++i) { A[i] = B[C[i]];}; get pointers for 8 elements from array B%ptrs = getelementptr double, ptr %B, <8 x i32> %C; load 8 elements from array B into A%A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x double> %passthru)trunc .. to’ Instruction¶<result> = trunc <ty> <value> to <ty2> ; yields ty2The ‘trunc’ instruction truncates its operand to the type ty2.
The ‘trunc’ instruction takes a value to trunc, and a type to truncit to. Both types must be of integer types, or vectorsof the same number of integers. The bit size of the value must belarger than the bit size of the destination type, ty2. Equal sizedtypes are not allowed.
The ‘trunc’ instruction truncates the high order bits in valueand converts the remaining bits to ty2. Since the source size mustbe larger than the destination size, trunc cannot be a no-op cast.It will always truncate bits.
%X = trunc i32 257 to i8 ; yields i8:1%Y = trunc i32 123 to i1 ; yields i1:true%Z = trunc i32 122 to i1 ; yields i1:false%W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>zext .. to’ Instruction¶<result> = zext <ty> <value> to <ty2> ; yields ty2The ‘zext’ instruction zero extends its operand to type ty2.
The ‘zext’ instruction takes a value to cast, and a type to cast itto. Both types must be of integer types, or vectors ofthe same number of integers. The bit size of the value must besmaller than the bit size of the destination type, ty2.
The zext fills the high order bits of the value with zero bitsuntil it reaches the size of the destination type, ty2.
When zero extending from i1, the result will always be either 0 or 1.
%X = zext i32 257 to i64 ; yields i64:257%Y = zext i1 true to i32 ; yields i32:1%Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>sext .. to’ Instruction¶<result> = sext <ty> <value> to <ty2> ; yields ty2The ‘sext’ sign extends value to the type ty2.
The ‘sext’ instruction takes a value to cast, and a type to cast itto. Both types must be of integer types, or vectors ofthe same number of integers. The bit size of the value must besmaller than the bit size of the destination type, ty2.
The ‘sext’ instruction performs a sign extension by copying the signbit (highest order bit) of the value until it reaches the bit sizeof the type ty2.
When sign extending from i1, the extension always results in -1 or 0.
%X = sext i8 -1 to i16 ; yields i16 :65535%Y = sext i1 true to i32 ; yields i32:-1%Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>fptrunc .. to’ Instruction¶<result> = fptrunc <ty> <value> to <ty2> ; yields ty2The ‘fptrunc’ instruction truncates value to type ty2.
The ‘fptrunc’ instruction takes a floating-pointvalue to cast and a floating-point type to cast it to.The size of value must be larger than the size of ty2. Thisimplies that fptrunc cannot be used to make a no-op cast.
The ‘fptrunc’ instruction casts a value from a largerfloating-point type to a smaller floating-point type.This instruction is assumed to execute in the default floating-pointenvironment.
%X = fptrunc double 16777217.0 to float ; yields float:16777216.0%Y = fptrunc double 1.0E+300 to half ; yields half:+infinityfpext .. to’ Instruction¶<result> = fpext <ty> <value> to <ty2> ; yields ty2The ‘fpext’ extends a floating-point value to a larger floating-pointvalue.
The ‘fpext’ instruction takes a floating-pointvalue to cast, and a floating-point type to cast itto. The source type must be smaller than the destination type.
The ‘fpext’ instruction extends the value from a smallerfloating-point type to a larger floating-point type. The fpext cannot be used to make ano-op cast because it always changes bits. Use bitcast to make ano-op cast for a floating-point cast.
%X = fpext float 3.125 to double ; yields double:3.125000e+00%Y = fpext double %X to fp128 ; yields fp128:0xL00000000000000004000900000000000fptoui .. to’ Instruction¶<result> = fptoui <ty> <value> to <ty2> ; yields ty2The ‘fptoui’ converts a floating-point value to its unsignedinteger equivalent of type ty2.
The ‘fptoui’ instruction takes a value to cast, which must be ascalar or vector floating-point value, and a type tocast it to ty2, which must be an integer type. Ifty is a vector floating-point type, ty2 must be a vector integertype with the same number of elements as ty
The ‘fptoui’ instruction converts its floating-point operand into the nearest (rounding towards zero)unsigned integer value. If the value cannot fit in ty2, the resultis a poison value.
%X = fptoui double 123.0 to i32 ; yields i32:123%Y = fptoui float 1.0E+300 to i1 ; yields undefined:1%Z = fptoui float 1.04E+17 to i8 ; yields undefined:1fptosi .. to’ Instruction¶<result> = fptosi <ty> <value> to <ty2> ; yields ty2The ‘fptosi’ instruction converts floating-pointvalue to type ty2.
The ‘fptosi’ instruction takes a value to cast, which must be ascalar or vector floating-point value, and a type tocast it to ty2, which must be an integer type. Ifty is a vector floating-point type, ty2 must be a vector integertype with the same number of elements as ty
The ‘fptosi’ instruction converts its floating-point operand into the nearest (rounding towards zero)signed integer value. If the value cannot fit in ty2, the resultis a poison value.
%X = fptosi double -123.0 to i32 ; yields i32:-123%Y = fptosi float 1.0E-247 to i1 ; yields undefined:1%Z = fptosi float 1.04E+17 to i8 ; yields undefined:1uitofp .. to’ Instruction¶<result> = uitofp <ty> <value> to <ty2> ; yields ty2The ‘uitofp’ instruction regards value as an unsigned integerand converts that value to the ty2 type.
The ‘uitofp’ instruction takes a value to cast, which must be ascalar or vector integer value, and a type to cast it toty2, which must be an floating-point type. Ifty is a vector integer type, ty2 must be a vector floating-pointtype with the same number of elements as ty
The ‘uitofp’ instruction interprets its operand as an unsignedinteger quantity and converts it to the corresponding floating-pointvalue. If the value cannot be exactly represented, it is rounded usingthe default rounding mode.
%X = uitofp i32 257 to float ; yields float:257.0%Y = uitofp i8 -1 to double ; yields double:255.0sitofp .. to’ Instruction¶<result> = sitofp <ty> <value> to <ty2> ; yields ty2The ‘sitofp’ instruction regards value as a signed integer andconverts that value to the ty2 type.
The ‘sitofp’ instruction takes a value to cast, which must be ascalar or vector integer value, and a type to cast it toty2, which must be an floating-point type. Ifty is a vector integer type, ty2 must be a vector floating-pointtype with the same number of elements as ty
The ‘sitofp’ instruction interprets its operand as a signed integerquantity and converts it to the corresponding floating-point value. If thevalue cannot be exactly represented, it is rounded using the default roundingmode.
%X = sitofp i32 257 to float ; yields float:257.0%Y = sitofp i8 -1 to double ; yields double:-1.0ptrtoint .. to’ Instruction¶<result> = ptrtoint <ty> <value> to <ty2> ; yields ty2The ‘ptrtoint’ instruction converts the pointer or a vector ofpointers value to the integer (or vector of integers) type ty2.
The ‘ptrtoint’ instruction takes a value to cast, which must bea value of type pointer or a vector of pointers, and atype to cast it to ty2, which must be an integer ora vector of integers type.
The ‘ptrtoint’ instruction converts value to integer typety2 by interpreting the pointer value as an integer and eithertruncating or zero extending that value to the size of the integer type.If value is smaller than ty2 then a zero extension is done. Ifvalue is larger than ty2 then a truncation is done. If they arethe same size, then nothing is done (no-op cast) other than a typechange.
%X = ptrtoint ptr %P to i8 ; yields truncation on 32-bit architecture%Y = ptrtoint ptr %P to i64 ; yields zero extension on 32-bit architecture%Z = ptrtoint <4 x ptr> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architectureinttoptr .. to’ Instruction¶<result> = inttoptr <ty> <value> to <ty2>[, !dereferenceable !<deref_bytes_node>][, !dereferenceable_or_null !<deref_bytes_node>] ; yields ty2The ‘inttoptr’ instruction converts an integer value to apointer type, ty2.
The ‘inttoptr’ instruction takes an integer value tocast, and a type to cast it to, which must be a pointertype.
The optional !dereferenceable metadata must reference a single metadataname <deref_bytes_node> corresponding to a metadata node with one i64entry.See dereferenceable metadata.
The optional !dereferenceable_or_null metadata must reference a singlemetadata name <deref_bytes_node> corresponding to a metadata node with onei64 entry.See dereferenceable_or_null metadata.
The ‘inttoptr’ instruction converts value to type ty2 byapplying either a zero extension or a truncation depending on the sizeof the integer value. If value is larger than the size of apointer then a truncation is done. If value is smaller than the sizeof a pointer then a zero extension is done. If they are the same size,nothing is done (no-op cast).
%X = inttoptr i32 255 to ptr ; yields zero extension on 64-bit architecture%Y = inttoptr i32 255 to ptr ; yields no-op on 32-bit architecture%Z = inttoptr i64 0 to ptr ; yields truncation on 32-bit architecture%Z = inttoptr <4 x i32> %G to <4 x ptr>; yields truncation of vector G to four pointersbitcast .. to’ Instruction¶<result> = bitcast <ty> <value> to <ty2> ; yields ty2The ‘bitcast’ instruction converts value to type ty2 withoutchanging any bits.
The ‘bitcast’ instruction takes a value to cast, which must be anon-aggregate first class value, and a type to cast it to, which mustalso be a non-aggregate first class type. Thebit sizes of value and the destination type, ty2, must beidentical. If the source type is a pointer, the destination type mustalso be a pointer of the same size. This instruction supports bitwiseconversion of vectors to integers and to vectors of other types (aslong as they have the same size).
The ‘bitcast’ instruction converts value to type ty2. Itis always a no-op cast because no bits change with thisconversion. The conversion is done as if the value had been storedto memory and read back as type ty2. Pointer (or vector ofpointers) types may only be converted to other pointer (or vector ofpointers) types with the same address space through this instruction.To convert pointers to other types, use the inttoptror ptrtoint instructions first.
There is a caveat for bitcasts involving vector types in relation toendianess. For example bitcast <2 x i8> <value> to i16 puts element zeroof the vector in the least significant bits of the i16 for little-endian whileelement zero ends up in the most significant bits for big-endian.
%X = bitcast i8 255 to i8 ; yields i8 :-1%Y = bitcast i32* %x to i16* ; yields i16*:%x%Z = bitcast <2 x i32> %V to i64; ; yields i64: %V (depends on endianess)%Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*>addrspacecast .. to’ Instruction¶<result> = addrspacecast <pty> <ptrval> to <pty2> ; yields pty2The ‘addrspacecast’ instruction converts ptrval from pty inaddress space n to type pty2 in address space m.
The ‘addrspacecast’ instruction takes a pointer or vector of pointer valueto cast and a pointer type to cast it to, which must have a differentaddress space.
The ‘addrspacecast’ instruction converts the pointer valueptrval to type pty2. It can be a no-op cast or a complexvalue modification, depending on the target and the address spacepair. Pointer conversions within the same address space must beperformed with the bitcast instruction. Note that if the addressspace conversion produces a dereferenceable result then both resultand operand refer to the same memory location. The conversion musthave no side effects, and must not capture the value of the pointer.
If the source is poison, the result ispoison.
If the source is not poison, and both source anddestination are integral pointers, and theresult pointer is dereferenceable, the cast is assumed to bereversible (i.e. casting the result back to the original address spaceshould yield the original bit pattern).
%X = addrspacecast ptr %x to ptr addrspace(1)%Y = addrspacecast ptr addrspace(1) %y to ptr addrspace(2)%Z = addrspacecast <4 x ptr> %z to <4 x ptr addrspace(3)>icmp’ Instruction¶<result> = icmp <cond> <ty> <op1>, <op2> ; yields i1 or <N x i1>:resultThe ‘icmp’ instruction returns a boolean value or a vector ofboolean values based on comparison of its two integer, integer vector,pointer, or pointer vector operands.
The ‘icmp’ instruction takes three operands. The first operand isthe condition code indicating the kind of comparison to perform. It isnot a value, just a keyword. The possible condition codes are:
eq: equalne: not equalugt: unsigned greater thanuge: unsigned greater or equalult: unsigned less thanule: unsigned less or equalsgt: signed greater thansge: signed greater or equalslt: signed less thansle: signed less or equalThe remaining two arguments must be integer orpointer or integer vector typed. Theymust also be identical types.
The ‘icmp’ compares op1 and op2 according to the conditioncode given as cond. The comparison performed always yields either ani1 or vector of i1 result, as follows:
eq: yields true if the operands are equal, falseotherwise. No sign interpretation is necessary or performed.ne: yields true if the operands are unequal, falseotherwise. No sign interpretation is necessary or performed.ugt: interprets the operands as unsigned values and yieldstrue if op1 is greater than op2.uge: interprets the operands as unsigned values and yieldstrue if op1 is greater than or equal to op2.ult: interprets the operands as unsigned values and yieldstrue if op1 is less than op2.ule: interprets the operands as unsigned values and yieldstrue if op1 is less than or equal to op2.sgt: interprets the operands as signed values and yields trueif op1 is greater than op2.sge: interprets the operands as signed values and yields trueif op1 is greater than or equal to op2.slt: interprets the operands as signed values and yields trueif op1 is less than op2.sle: interprets the operands as signed values and yields trueif op1 is less than or equal to op2.If the operands are pointer typed, the pointer valuesare compared as if they were integers.
If the operands are integer vectors, then they are compared element byelement. The result is an i1 vector with the same number of elementsas the values being compared. Otherwise, the result is an i1.
<result> = icmp eq i32 4, 5 ; yields: result=false<result> = icmp ne ptr %X, %X ; yields: result=false<result> = icmp ult i16 4, 5 ; yields: result=true<result> = icmp sgt i16 4, 5 ; yields: result=false<result> = icmp ule i16 -4, 5 ; yields: result=false<result> = icmp sge i16 4, 5 ; yields: result=falsefcmp’ Instruction¶<result> = fcmp [fast-math flags]* <cond> <ty> <op1>, <op2> ; yields i1 or <N x i1>:resultThe ‘fcmp’ instruction returns a boolean value or vector of booleanvalues based on comparison of its operands.
If the operands are floating-point scalars, then the result type is aboolean (i1).
If the operands are floating-point vectors, then the result type is avector of boolean with the same number of elements as the operands beingcompared.
The ‘fcmp’ instruction takes three operands. The first operand isthe condition code indicating the kind of comparison to perform. It isnot a value, just a keyword. The possible condition codes are:
false: no comparison, always returns falseoeq: ordered and equalogt: ordered and greater thanoge: ordered and greater than or equalolt: ordered and less thanole: ordered and less than or equalone: ordered and not equalord: ordered (no nans)ueq: unordered or equalugt: unordered or greater thanuge: unordered or greater than or equalult: unordered or less thanule: unordered or less than or equalune: unordered or not equaluno: unordered (either nans)true: no comparison, always returns trueOrdered means that neither operand is a QNAN while unordered meansthat either operand may be a QNAN.
Each of val1 and val2 arguments must be either a floating-point type or a vector of floating-point type.They must have identical types.
The ‘fcmp’ instruction compares op1 and op2 according to thecondition code given as cond. If the operands are vectors, then thevectors are compared element by element. Each comparison performedalways yields an i1 result, as follows:
false: always yields false, regardless of operands.oeq: yields true if both operands are not a QNAN and op1is equal to op2.ogt: yields true if both operands are not a QNAN and op1is greater than op2.oge: yields true if both operands are not a QNAN and op1is greater than or equal to op2.olt: yields true if both operands are not a QNAN and op1is less than op2.ole: yields true if both operands are not a QNAN and op1is less than or equal to op2.one: yields true if both operands are not a QNAN and op1is not equal to op2.ord: yields true if both operands are not a QNAN.ueq: yields true if either operand is a QNAN or op1 isequal to op2.ugt: yields true if either operand is a QNAN or op1 isgreater than op2.uge: yields true if either operand is a QNAN or op1 isgreater than or equal to op2.ult: yields true if either operand is a QNAN or op1 isless than op2.ule: yields true if either operand is a QNAN or op1 isless than or equal to op2.une: yields true if either operand is a QNAN or op1 isnot equal to op2.uno: yields true if either operand is a QNAN.true: always yields true, regardless of operands.The fcmp instruction can also optionally take any number offast-math flags, which are optimization hints to enableotherwise unsafe floating-point optimizations.
Any set of fast-math flags are legal on an fcmp instruction, but theonly flags that have any effect on its semantics are those that allowassumptions to be made about the values of input arguments; namelynnan, ninf, and reassoc. See Fast-Math Flags for more information.
<result> = fcmp oeq float 4.0, 5.0 ; yields: result=false<result> = fcmp one float 4.0, 5.0 ; yields: result=true<result> = fcmp olt float 4.0, 5.0 ; yields: result=true<result> = fcmp ueq double 1.0, 2.0 ; yields: result=falsephi’ Instruction¶<result> = phi [fast-math-flags] <ty> [ <val0>, <label0>], ...The ‘phi’ instruction is used to implement the φ node in the SSAgraph representing the function.
The type of the incoming values is specified with the first type field.After this, the ‘phi’ instruction takes a list of pairs asarguments, with one pair for each predecessor basic block of the currentblock. Only values of first class type may be used asthe value arguments to the PHI node. Only labels may be used as thelabel arguments.
There must be no non-phi instructions between the start of a basic blockand the PHI instructions: i.e. PHI instructions must be first in a basicblock.
For the purposes of the SSA form, the use of each incoming value isdeemed to occur on the edge from the corresponding predecessor block tothe current block (but after any definition of an ‘invoke’instruction’s return value on the same edge).
The optional fast-math-flags marker indicates that the phi has oneor more fast-math-flags. These are optimization hintsto enable otherwise unsafe floating-point optimizations. Fast-math-flagsare only valid for phis that return a floating-point scalar or vectortype, or an array (nested to any depth) of floating-point scalar or vectortypes.
At runtime, the ‘phi’ instruction logically takes on the valuespecified by the pair corresponding to the predecessor basic block thatexecuted just prior to the current block.
Loop: ; Infinite loop that counts from 0 on up... %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ] %nextindvar = add i32 %indvar, 1 br label %Loopselect’ Instruction¶<result> = select [fast-math flags] selty <cond>, <ty> <val1>, <ty> <val2> ; yields tyselty is either i1 or {<N x i1>}The ‘select’ instruction is used to choose one value based on acondition, without IR-level branching.
The ‘select’ instruction requires an ‘i1’ value or a vector of ‘i1’values indicating the condition, and two values of the same firstclass type.
fast-math flags marker indicates that the select has one or morefast-math flags. These are optimization hints to enableotherwise unsafe floating-point optimizations. Fast-math flags are only validfor selects that return a floating-point scalar or vector type, or an array(nested to any depth) of floating-point scalar or vector types.If the condition is an i1 and it evaluates to 1, the instruction returnsthe first value argument; otherwise, it returns the second valueargument.
If the condition is a vector of i1, then the value arguments must bevectors of the same size, and the selection is done element by element.
If the condition is an i1 and the value arguments are vectors of thesame size, then an entire vector is selected.
%X = select i1 true, i8 17, i8 42 ; yields i8:17freeze’ Instruction¶<result> = freeze ty <val> ; yields ty:resultThe ‘freeze’ instruction takes a single argument.
If the argument is undef or poison, ‘freeze’ returns anarbitrary, but fixed, value of type ‘ty’.Otherwise, this instruction is a no-op and returns the input argument.All uses of a value returned by the same ‘freeze’ instruction areguaranteed to always observe the same value, while different ‘freeze’instructions may yield different values.
While undef and poison pointers can be frozen, the result is anon-dereferenceable pointer. See thePointer Aliasing Rules section for more information.If an aggregate value or vector is frozen, the operand is frozen element-wise.The padding of an aggregate isn’t considered, since it isn’t visiblewithout storing it into memory and loading it with a different type.
%w = i32 undef%x = freeze i32 %w%y = add i32 %w, %w ; undef%z = add i32 %x, %x ; even number because all uses of %x observe ; the same value%x2 = freeze i32 %w%cmp = icmp eq i32 %x, %x2 ; can be true or false; example with vectors%v = <2 x i32> <i32 undef, i32 poison>%a = extractelement <2 x i32> %v, i32 0 ; undef%b = extractelement <2 x i32> %v, i32 1 ; poison%add = add i32 %a, %a ; undef%v.fr = freeze <2 x i32> %v ; element-wise freeze%d = extractelement <2 x i32> %v.fr, i32 0 ; not undef%add.f = add i32 %d, %d ; even number; branching on frozen value%poison = add nsw i1 %k, undef ; poison%c = freeze i1 %poisonbr i1 %c, label %foo, label %bar ; non-deterministic branch to %foo or %barcall’ Instruction¶<result> = [tail | musttail | notail ] call [fast-math flags] [cconv] [ret attrs] [addrspace(<num>)] <ty>|<fnty> <fnptrval>(<function args>) [fn attrs] [ operand bundles ]The ‘call’ instruction represents a simple function call.
This instruction requires several arguments:
The optional tail and musttail markers indicate that the optimizersshould perform tail call optimization. The tail marker is a hint thatcan be ignored. The musttail markermeans that the call must be tail call optimized in order for the program tobe correct. This is true even in the presence of attributes like“disable-tail-calls”. The musttail marker provides these guarantees:
"thunk" attributeand the caller and callee both have varargs, than any unprototypedarguments in register or memory are forwarded to the callee. Similarly,the return value of the callee is returned to the caller’s caller, evenif a void return type is in use.Both markers imply that the callee does not access allocas from the caller.The tail marker additionally implies that the callee does not accessvarargs from the caller. Calls marked musttail must obey the followingadditional rules:
In addition, if the calling convention is not swifttailcc or tailcc:
- All ABI-impacting function attributes, such as sret, byval, inreg,returned, and inalloca, must match.
- The caller and callee prototypes must match. Pointer types of parametersor return types may differ in pointee type, but not in address space.
On the other hand, if the calling convention is swifttailcc or swiftcc:
- Only these ABI-impacting attributes attributes are allowed: sret, byval,swiftself, and swiftasync.
- Prototypes are not required to match.
Tail call optimization for calls marked
tailis guaranteed to occur ifthe following conditions are met:
- Caller and callee both have the calling convention
fastccortailcc.- The call is in tail position (ret immediately follows call and retuses value of call or is void).
- Option
-tailcalloptis enabled,llvm::GuaranteedTailCallOptistrue, or the calling conventionistailcc- Platform-specific constraints aremet.
notail marker indicates that the optimizers should not addtail or musttail markers to the call. It is used to prevent tailcall optimization from being performed on the call.fast-math flags marker indicates that the call has one or morefast-math flags, which are optimization hints to enableotherwise unsafe floating-point optimizations. Fast-math flags are only validfor calls that return a floating-point scalar or vector type, or an array(nested to any depth) of floating-point scalar or vector types.zeroext’, ‘signext’, and ‘inreg’ attributesare valid here.ty’: the type of the call instruction itself which is also thetype of the return value. Functions that return no value are markedvoid.fnty’: shall be the signature of the function being called. Theargument types must match the types implied by this signature. Thistype can be omitted if the function is not varargs.fnptrval’: An LLVM value containing a pointer to a function tobe called. In most cases, this is a direct function call, butindirect call’s are just as possible, calling an arbitrary pointerto function value.function args’: argument list whose types match the functionsignature argument types and parameter attributes. All arguments mustbe of first class type. If the function signatureindicates the function accepts a variable number of arguments, theextra arguments can be specified.The ‘call’ instruction is used to cause control flow to transfer toa specified function, with its incoming arguments bound to the specifiedvalues. Upon a ‘ret’ instruction in the called function, controlflow continues with the instruction after the function call, and thereturn value of the function is bound to the result argument.
%retval = call i32 @test(i32 %argc)call i32 (ptr, ...) @printf(ptr %msg, i32 12, i8 42) ; yields i32%X = tail call i32 @foo() ; yields i32%Y = tail call fastcc i32 @foo() ; yields i32call void %foo(i8 signext 97)%struct.A = type { i32, i8 }%r = call %struct.A @foo() ; yields { i32, i8 }%gr = extractvalue %struct.A %r, 0 ; yields i32%gr1 = extractvalue %struct.A %r, 1 ; yields i8%Z = call void @foo() noreturn ; indicates that %foo never returns normally%ZZ = call zeroext i32 @bar() ; Return value is %zero extendedllvm treats calls to some functions with names and arguments that matchthe standard C99 library as being the C99 library functions, and mayperform optimizations or generate code for them under that assumption.This is something we’d like to change in the future to provide bettersupport for freestanding environments and non-C-based languages.
va_arg’ Instruction¶<resultval> = va_arg <va_list*> <arglist>, <argty>The ‘va_arg’ instruction is used to access arguments passed throughthe “variable argument” area of a function call. It is used to implementthe va_arg macro in C.
This instruction takes a va_list* value and the type of theargument. It returns a value of the specified argument type andincrements the va_list to point to the next argument. The actualtype of va_list is target specific.
The ‘va_arg’ instruction loads an argument of the specified typefrom the specified va_list and causes the va_list to point tothe next argument. For more information, see the variable argumenthandling Intrinsic Functions.
It is legal for this instruction to be called in a function which doesnot take a variable number of arguments, for example, the vfprintffunction.
va_arg is an LLVM instruction instead of an intrinsicfunction because it takes a type as an argument.
See the variable argument processing section.
Note that the code generator does not yet fully support va_arg on manytargets. Also, it does not currently support va_arg with aggregatetypes on any target.
landingpad’ Instruction¶<resultval> = landingpad <resultty> <clause>+<resultval> = landingpad <resultty> cleanup <clause>*<clause> := catch <type> <value><clause> := filter <array constant type> <array constant>The ‘landingpad’ instruction is used by LLVM’s exception handlingsystem to specify that a basic blockis a landing pad — one where the exception lands, and corresponds to thecode found in the catch portion of a try/catch sequence. Itdefines values supplied by the personality function uponre-entry to the function. The resultval has the type resultty.
The optionalcleanup flag indicates that the landing pad block is a cleanup.
A clause begins with the clause type — catch or filter — andcontains the global variable representing the “type” that may be caughtor filtered respectively. Unlike the catch clause, the filterclause takes an array constant as its argument. Use“[0 x ptr] undef” for a filter which cannot throw. The‘landingpad’ instruction must contain at least one clause orthe cleanup flag.
The ‘landingpad’ instruction defines the values which are set by thepersonality function upon re-entry to the function, andtherefore the “result type” of the landingpad instruction. As withcalling conventions, how the personality function results arerepresented in LLVM IR is target specific.
The clauses are applied in order from top to bottom. If twolandingpad instructions are merged together through inlining, theclauses from the calling function are appended to the list of clauses.When the call stack is being unwound due to an exception being thrown,the exception is compared against each clause in turn. If it doesn’tmatch any of the clauses, and the cleanup flag is not set, thenunwinding continues further up the call stack.
The landingpad instruction has several restrictions:
invoke’ instruction.landingpad’ instruction as itsfirst non-PHI instruction.landingpad’ instruction within the landingpad block.landingpad’ instruction.;; A landing pad which can catch an integer.%res = landingpad { ptr, i32 } catch ptr @_ZTIi;; A landing pad that is a cleanup.%res = landingpad { ptr, i32 } cleanup;; A landing pad which can catch an integer and can only throw a double.%res = landingpad { ptr, i32 } catch ptr @_ZTIi filter [1 x ptr] [ptr @_ZTId]catchpad’ Instruction¶<resultval> = catchpad within <catchswitch> [<args>*]The ‘catchpad’ instruction is used by LLVM’s exception handlingsystem to specify that a basic blockbegins a catch handler — one where a personality routine attempts to transfercontrol to catch an exception.
The catchswitch operand must always be a token produced by acatchswitch instruction in a predecessor block. Thisensures that each catchpad has exactly one predecessor block, and it alwaysterminates in a catchswitch.
The args correspond to whatever information the personality routinerequires to know if this is an appropriate handler for the exception. Controlwill transfer to the catchpad if this is the first appropriate handler forthe exception.
The resultval has the type token and is used to match thecatchpad to corresponding catchrets and other nested EHpads.
When the call stack is being unwound due to an exception being thrown, theexception is compared against the args. If it doesn’t match, control willnot reach the catchpad instruction. The representation of args isentirely target and personality function-specific.
Like the landingpad instruction, the catchpadinstruction must be the first non-phi of its parent basic block.
The meaning of the tokens produced and consumed by catchpad and other “pad”instructions is described in theWindows exception handling documentation.
When a catchpad has been “entered” but not yet “exited” (asdescribed in the EH documentation),it is undefined behavior to execute a call or invokethat does not carry an appropriate “funclet” bundle.
dispatch: %cs = catchswitch within none [label %handler0] unwind to caller ;; A catch block which can catch an integer.handler0: %tok = catchpad within %cs [ptr @_ZTIi]cleanuppad’ Instruction¶<resultval> = cleanuppad within <parent> [<args>*]The ‘cleanuppad’ instruction is used by LLVM’s exception handlingsystem to specify that a basic blockis a cleanup block — one where a personality routine attempts totransfer control to run cleanup actions.The args correspond to whatever additionalinformation the personality function requires toexecute the cleanup.The resultval has the type token and is used tomatch the cleanuppad to corresponding cleanuprets.The parent argument is the token of the funclet that contains thecleanuppad instruction. If the cleanuppad is not inside a funclet,this operand may be the token none.
The instruction takes a list of arbitrary values which are interpretedby the personality function.
When the call stack is being unwound due to an exception being thrown,the personality function transfers control to thecleanuppad with the aid of the personality-specific arguments.As with calling conventions, how the personality function results arerepresented in LLVM IR is target specific.
The cleanuppad instruction has several restrictions:
cleanuppad’ instruction as itsfirst non-PHI instruction.cleanuppad’ instruction within thecleanup block.cleanuppad’ instruction.When a cleanuppad has been “entered” but not yet “exited” (asdescribed in the EH documentation),it is undefined behavior to execute a call or invokethat does not carry an appropriate “funclet” bundle.
%tok = cleanuppad within %cs []