Skip to content

Commit

Permalink
encode,encode2: Support AVX-512 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
aengelke committed Jul 7, 2024
1 parent a16367e commit 0d67083
Show file tree
Hide file tree
Showing 11 changed files with 901 additions and 91 deletions.
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,26 @@ failed |= fe_enc64(&cur, FE_RET);
The API consists of one function to handle encode requests, as well as some macros. More information can be found in [fadec-enc.h](fadec-enc.h). Usage of internals like enum values is not recommended.

- `int fe_enc64(uint8_t** buf, uint64_t mnem, int64_t operands...)`
- Encodes an instruction for x86-64 into `*buf`.
- Encodes an instruction for x86-64 into `*buf`. EVEX-encoded instructions will transparently encode with the shorter VEX prefix where permitted.
- Return value: `0` on success, a negative value in error cases.
- `buf`: Pointer to the pointer to the instruction buffer. The pointer (`*buf`) will be advanced by the number of bytes written. The instruction buffer must have at least 15 bytes left.
- `mnem`: Instruction mnemonic to encode combined with extra flags:
- `FE_SEG(segreg)`: override segment to specified segment register.
- `FE_ADDR32`: override address size to 32-bit.
- `FE_JMPL`: use longest possible offset encoding, useful when jump target is not known.
- `FE_MASK(maskreg)`: specify non-zero mask register (1--7) for instructions that support masking (suffixed with `_mask` or `_maskz`) or require a mask (AVX-512 gather/scatter).
- `FE_RC_RN/RD/RU/RZ`: set rounding mode for instructions with static rounding control (suffixed `_er`).
- `operands...`: Up to 4 instruction operands. The operand kinds must match the requirements of the mnemonic.
- For register operands, use the register: `FE_AX`, `FE_AH`, `FE_XMM12`.
- For immediate operands, use the constant: `12`, `-0xbeef`.
- For memory operands, use: `FE_MEM(basereg,scale,indexreg,offset)`. Use `0` to specify _no register_. For RIP-relative addressing, the size of the instruction is added automatically.
- For offset operands, specify the target address.
- For register operands (`r`=non-mask register, `k`=mask register), use the register: `FE_AX`, `FE_AH`, `FE_XMM12`.
- For immediate operands (`i`=regular, `a`=absolute address), use the constant: `12`, `-0xbeef`.
- For memory operands (`m`=regular or `b`=broadcast), use: `FE_MEM(basereg,scale,indexreg,offset)`. Use `0` to specify _no register_. For RIP-relative addressing, the size of the instruction is added automatically.
- For offset operands (`o`), specify the target address.

## Known issues
- Decoder/Encoder: register uniqueness constraints are not enforced. This affects:
- VSIB-encoded instructions: no vector register may be used more than once
- AMX instructions: no tile register may be used more than once
- AVX-512 complex FP16 multiplication: destination must be not be equal to a source register
- Encoder: AVX-512 not supported (yet).
- Prefixes for indirect jumps and calls are not properly decoded, e.g. `notrack`, `bnd`.
- Low test coverage. (Help needed.)
- No Python API.
Expand Down
1 change: 1 addition & 0 deletions encode-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ main(int argc, char** argv)
// VSIB encoding doesn't differ for this API
#define FE_MEMV FE_MEM
#define FE_PTR(off) ((intptr_t) buf + (off))
#define FLAGMASK(flags, mask) (flags | FE_MASK(mask & 7))
#include "encode-test.inc"

puts(failed ? "Some tests FAILED" : "All tests PASSED");
Expand Down
511 changes: 511 additions & 0 deletions encode-test.inc

Large diffs are not rendered by default.

87 changes: 68 additions & 19 deletions encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,32 @@
#define OPC_67 FE_ADDR32
#define OPC_SEG_MSK 0xe0000000
#define OPC_JMPL FE_JMPL
#define OPC_MASK_MSK 0x1e00000000
#define OPC_MASK_MSK 0xe00000000
#define OPC_EVEXZ 0x1000000000
#define OPC_USER_MSK (OPC_67|OPC_SEG_MSK|OPC_MASK_MSK)
#define OPC_FORCE_SIB 0x2000000000
#define OPC_DOWNGRADE_VEX 0x4000000000
#define OPC_DOWNGRADE_VEX_FLIPW 0x40000000000
#define OPC_EVEX_DISP8SCALE 0x38000000000
#define OPC_GPH_OP0 0x200000000000
#define OPC_GPH_OP1 0x400000000000

#define EPFX_REX_MSK 0x0f
#define EPFX_REX 0x08
#define EPFX_REXR 0x04
#define EPFX_REXX 0x02
#define EPFX_REXB 0x01
#define EPFX_VVVV_IDX 4
#define EPFX_REX_MSK 0x43f
#define EPFX_REX 0x20
#define EPFX_EVEX 0x40
#define EPFX_REXR 0x10
#define EPFX_REXX 0x08
#define EPFX_REXB 0x04
#define EPFX_REXR4 0x02
#define EPFX_REXB4 0x01
#define EPFX_REXX4 0x400
#define EPFX_VVVV_IDX 11

static bool op_mem(FeOp op) { return op < 0; }
static bool op_reg(FeOp op) { return op >= 0; }
static bool op_reg_gpl(FeOp op) { return (op & ~0xf) == 0x100; }
static bool op_reg_gpl(FeOp op) { return (op & ~0x1f) == 0x100; }
static bool op_reg_gph(FeOp op) { return (op & ~0x3) == 0x204; }
static bool op_reg_xmm(FeOp op) { return (op & ~0xf) == 0x600; }
static bool op_reg_xmm(FeOp op) { return (op & ~0x1f) == 0x600; }
static int64_t op_mem_offset(FeOp op) { return (int32_t) op; }
static unsigned op_mem_base(FeOp op) { return (op >> 32) & 0xfff; }
static unsigned op_mem_idx(FeOp op) { return (op >> 44) & 0xfff; }
Expand Down Expand Up @@ -97,13 +105,29 @@ enc_opc(uint8_t** restrict buf, uint64_t opc, uint64_t epfx)
*(*buf)++ = (0x65643e362e2600 >> (8 * ((opc >> 29) & 7))) & 0xff;
if (opc & OPC_67) *(*buf)++ = 0x67;
if (opc & OPC_EVEXL0) {
return -1;
*(*buf)++ = 0x62;
unsigned b1 = opc >> 16 & 7;
if (!(epfx & EPFX_REXR)) b1 |= 0x80;
if (!(epfx & EPFX_REXX)) b1 |= 0x40;
if (!(epfx & EPFX_REXB)) b1 |= 0x20;
if (!(epfx & EPFX_REXR4)) b1 |= 0x10;
if ((epfx & EPFX_REXB4)) b1 |= 0x08;
*(*buf)++ = b1;
unsigned b2 = opc >> 20 & 3;
if (!(epfx & EPFX_REXX4)) b2 |= 0x04;
b2 |= (~(epfx >> EPFX_VVVV_IDX) & 0xf) << 3;
if (opc & OPC_REXW) b2 |= 0x80;
*(*buf)++ = b2;
unsigned b3 = opc >> 33 & 7;
b3 |= (~(epfx >> EPFX_VVVV_IDX) & 0x10) >> 1;
if (opc & OPC_EVEXB) b3 |= 0x10;
b3 |= (opc >> 23 & 3) << 5;
if (opc & OPC_EVEXZ) b3 |= 0x80;
*(*buf)++ = b3;
} else if (opc & OPC_VEXL0) {
if (epfx & (EPFX_REXR4|EPFX_REXX4|EPFX_REXB4|(0x10<<EPFX_VVVV_IDX))) return -1;
bool vex3 = opc & (OPC_REXW|0x20000) || epfx & (EPFX_REXX|EPFX_REXB);
unsigned pp = 0;
if (opc & OPC_66) pp = 1;
if (opc & OPC_F3) pp = 2;
if (opc & OPC_F2) pp = 3;
unsigned pp = opc >> 20 & 3;
*(*buf)++ = 0xc4 | !vex3;
unsigned b2 = pp | (opc & 0x800000 ? 0x4 : 0);
if (vex3) {
Expand Down Expand Up @@ -171,13 +195,28 @@ enc_mr(uint8_t** restrict buf, uint64_t opc, uint64_t epfx, uint64_t op0,
{
// If !op_reg(op1), it is a constant value for ModRM.reg
if (op_reg(op0) && (op_reg_idx(op0) & 0x8)) epfx |= EPFX_REXB;
if (op_reg(op0) && (op_reg_idx(op0) & 0x10))
epfx |= 0 ? EPFX_REXB4 : EPFX_REXX|EPFX_EVEX;
if (op_mem(op0) && (op_mem_base(op0) & 0x8)) epfx |= EPFX_REXB;
if (op_mem(op0) && (op_mem_base(op0) & 0x10)) epfx |= EPFX_REXB4;
if (op_mem(op0) && (op_mem_idx(op0) & 0x8)) epfx |= EPFX_REXX;
if (op_reg(op1) && op_reg_idx(op1) & 0x8) epfx |= EPFX_REXR;
if (op_mem(op0) && (op_mem_idx(op0) & 0x10))
epfx |= opc & OPC_VSIB ? 0x10<<EPFX_VVVV_IDX : EPFX_REXX4;
if (op_reg(op1) && (op_reg_idx(op1) & 0x8)) epfx |= EPFX_REXR;
if (op_reg(op1) && (op_reg_idx(op1) & 0x10)) epfx |= EPFX_REXR4;

bool has_rex = opc & OPC_REXW || epfx & EPFX_REX_MSK;
bool has_rex = opc & (OPC_REXW|OPC_VEXL0|OPC_EVEXL0) || (epfx & EPFX_REX_MSK);
if (has_rex && (op_reg_gph(op0) || op_reg_gph(op1))) return -1;

if (epfx & (EPFX_EVEX|EPFX_REXB4|EPFX_REXX4|EPFX_REXR4|(0x10<<EPFX_VVVV_IDX))) {
if (!(opc & OPC_EVEXL0)) return -1;
} else if (opc & OPC_DOWNGRADE_VEX) { // downgrade EVEX to VEX
// clear EVEX and disp8scale, set VEX
opc = (opc & ~(uint64_t) (OPC_EVEXL0|OPC_EVEX_DISP8SCALE)) | OPC_VEXL0;
if (opc & OPC_DOWNGRADE_VEX_FLIPW)
opc ^= OPC_REXW;
}

if (LIKELY(op_reg(op0))) {
if (enc_opc(buf, opc, epfx)) return -1;
*(*buf)++ = 0xc0 | ((op_reg_idx(op1) & 7) << 3) | (op_reg_idx(op0) & 7);
Expand All @@ -198,6 +237,8 @@ enc_mr(uint8_t** restrict buf, uint64_t opc, uint64_t epfx, uint64_t op0,
if (opc & OPC_VSIB)
{
if (!op_reg_xmm(op_mem_idx(op0))) return -1;
// EVEX VSIB requires non-zero opmask
if ((opc & OPC_EVEXL0) && !(opc & OPC_MASK_MSK)) return -1;
}
else
{
Expand Down Expand Up @@ -235,8 +276,15 @@ enc_mr(uint8_t** restrict buf, uint64_t opc, uint64_t epfx, uint64_t op0,
rm = 4;
}
if (off) {
mod = op_imm_n(off, 1) ? 0x40 : 0x80;
dispsz = op_imm_n(off, 1) ? 1 : 4;
unsigned disp8scale = (opc & OPC_EVEX_DISP8SCALE) >> 39;
if (!(off & ((1 << disp8scale) - 1)) && op_imm_n(off >> disp8scale, 1)) {
mod = 0x40;
dispsz = 1;
off >>= disp8scale;
} else {
mod = 0x80;
dispsz = 4;
}
} else if (rm == 5) {
mod = 0x40;
dispsz = 1;
Expand Down Expand Up @@ -370,7 +418,8 @@ try_encode:;
FeOp modreg = ei->modreg ? ops[ei->modreg^3] : (opc & 0xff00) >> 8;
if (ei->vexreg)
epfx |= ((uint64_t) op_reg_idx(ops[ei->vexreg^3])) << EPFX_VVVV_IDX;
if (enc_mr(buf, opc, epfx, ops[ei->modrm^3], modreg, immsz)) goto fail;
// Can fail for upgrade to EVEX due to high register numbers
if (enc_mr(buf, opc, epfx, ops[ei->modrm^3], modreg, immsz)) goto next;
} else if (ei->modreg) {
if (enc_o(buf, opc, epfx, ops[ei->modreg^3])) goto fail;
} else {
Expand Down
1 change: 1 addition & 0 deletions encode2-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ main(void) {
#define ENC_TEST_TYPESAFE
// Silence -Warray-bounds with double cast
#define FE_PTR(off) (const void*) ((uintptr_t) buf + (off))
#define FLAGMASK(flags, mask) flags, mask
#include "encode-test.inc"

TEST("\x90", NOP, 0);
Expand Down
1 change: 1 addition & 0 deletions encode2-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ int main() {
#define ENC_TEST_TYPESAFE
// Silence -Warray-bounds with double cast
#define FE_PTR(off) (const void*) ((uintptr_t) buf.data() + (off))
#define FLAGMASK(flags, mask) flags, mask
#include "encode-test.inc"

std::puts(failed ? "Some tests FAILED" : "All tests PASSED");
Expand Down
Loading

0 comments on commit 0d67083

Please sign in to comment.