Skip to content

Commit

Permalink
Relax a GOT load into a PC-relative address materialization
Browse files Browse the repository at this point in the history
  • Loading branch information
rui314 committed Sep 20, 2023
1 parent 9e09e74 commit 1a3ff8e
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 2 deletions.
86 changes: 84 additions & 2 deletions elf/arch-riscv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,67 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
}
break;
}
case R_RISCV_GOT_HI20:
write_utype(loc, G + GOT + A - P);
case R_RISCV_GOT_HI20: {
// This relocation usually refers to an AUIPC + LD instruction
// pair to load a symbol value from the GOT. If the symbol value
// is actually a link-time constant, we can materialize the value
// directly into a register to eliminate a memory load.
i64 rd = get_rd(rel.r_offset);

switch (removed_bytes) {
case 6:
// c.li <rd>, val
*(ul16 *)loc = 0b010'0'00000'00000'01 | (rd << 7);
write_citype(loc, sym.get_addr(ctx));
i += 3;
break;
case 4:
// addi <rd>, zero, val
*(ul32 *)loc = 0b0010011 | (rd << 7);
write_itype(loc, sym.get_addr(ctx));
i += 3;
break;
case 2: {
// c.lui <rd>, %hi20(val)
u64 val = sym.get_addr(ctx);
*(ul16 *)loc = 0b011'0'00000'00000'01 | (rd << 7);
write_citype(loc, (val + 0x800) >> 12);

// addi <rd>, <rd>, %lo12(val)
*(ul32 *)(loc + 2) = 0b0010011 | (rd << 15) | (rd << 7);
write_itype(loc + 2, val);
i += 3;
break;
}
case 0:
if (ctx.arg.relax &&
sym.is_pcrel_linktime_const(ctx) &&
i + 3 < rels.size() &&
rels[i + 1].r_type == R_RISCV_RELAX &&
rels[i + 2].r_type == R_RISCV_PCREL_LO12_I &&
rels[i + 2].r_offset == rels[i].r_offset + 4 &&
file.symbols[rels[i + 1].r_sym]->value == r_offset &&
rels[i + 3].r_type == R_RISCV_RELAX) {
i64 val = S + A - P;
if ((i32)val == val) {
// auipc <rd>, %hi20(val)
write_utype(loc, val);

// addi <rd>, <rd>, %lo12(val)
*(ul32 *)(loc + 4) = 0b0010011 | (rd << 15) | (rd << 7);
write_itype(loc + 4, val);
i += 3;
break;
}
}

write_utype(loc, G + GOT + A - P);
break;
default:
unreachable();
}
break;
}
case R_RISCV_TLS_GOT_HI20:
write_utype(loc, sym.get_gottp_addr(ctx) + A - P);
break;
Expand Down Expand Up @@ -932,6 +990,30 @@ static void shrink_section(Context<E> &ctx, InputSection<E> &isec, bool use_rvc)
}
break;
}
case R_RISCV_GOT_HI20: {
if (sym.is_absolute() &&
i + 3 < rels.size() &&
rels[i + 1].r_type == R_RISCV_RELAX &&
rels[i + 2].r_type == R_RISCV_PCREL_LO12_I &&
rels[i + 2].r_offset == rels[i].r_offset + 4 &&
isec.file.symbols[rels[i + 2].r_sym]->value == rels[i].r_offset &&
rels[i + 3].r_type == R_RISCV_RELAX) {
u64 val = sym.get_addr(ctx) + r.r_addend;
i64 rd = get_rd(r.r_offset);

if (use_rvc && rd != 0 && sign_extend(val, 5) == val) {
// Replace AUIPC + LD with C.LI.
delta += 6;
} else if (sign_extend(val, 11) == val) {
// Replace AUIPC + LD with ADDI.
delta += 4;
} else if (use_rvc && rd != 0 && rd != 2 && sign_extend(val, 17) == val) {
// Replace AUIPC + LD with C.LUI + ADDI.
delta += 2;
}
}
break;
}
case R_RISCV_HI20: {
u64 val = sym.get_addr(ctx) + r.r_addend;
i64 rd = get_rd(r.r_offset);
Expand Down
53 changes: 53 additions & 0 deletions test/elf/riscv64_relax-got.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
. $(dirname $0)/common.inc

[[ $MACHINE = riscv* ]] || skip

cat <<EOF | $CC -o $t/a.o -c -xassembler -
.globl get_sym1, get_sym2, get_sym3, get_sym4, get_sym5
get_sym1:
la a0, sym1
ret
get_sym2:
la a0, sym2
ret
get_sym3:
la a0, sym3
ret
get_sym4:
la a0, sym4
ret
get_sym5:
la a0, sym5
ret
EOF

cat <<EOF | $CC -o $t/b.o -c -xassembler -
.globl sym1, sym2, sym3, sym4, sym5
sym1 = 0x0
sym2 = 0xba
sym3 = 0xbeef
sym4 = 0x11beef
sym5 = 0xdeadbeef
EOF

cat <<EOF | $CC -o $t/c.o -c -xc -
#include <stdio.h>
int get_sym1();
int get_sym2();
int get_sym3();
int get_sym4();
int get_sym5();
int main() {
printf("%x %x %x %x %x\n",
get_sym1(), get_sym2(), get_sym3(), get_sym4(), get_sym5());
}
EOF

$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -Wl,--no-relax
$QEMU $t/exe1 | grep -Eq '^0 ba beef 11beef deadbeef$'

$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o
$QEMU $t/exe2 | grep -Eq '^0 ba beef 11beef deadbeef$'

0 comments on commit 1a3ff8e

Please sign in to comment.