From 4b91af1232fe7a18b4fca4329d0c916cbfa59dc9 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 14:29:17 +0800 Subject: [PATCH 1/8] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) ------ * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) The code sequence obtained is as follows: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ``` --- lld/ELF/Arch/LoongArch.cpp | 148 ++++++++++++++++++++++++++++++++++++- lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp | 38 ++++++---- lld/ELF/Relocations.h | 1 + 4 files changed, 171 insertions(+), 17 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 3c4ad53af1b51..a8787e0caea95 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,8 +39,13 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -58,6 +63,7 @@ enum Op { LU12I_W = 0x14000000, PCADDI = 0x18000000, PCADDU12I = 0x1c000000, + PCALAU12I = 0x1a000000, LD_W = 0x28800000, LD_D = 0x28c00000, JIRL = 0x4c000000, @@ -69,6 +75,7 @@ enum Reg { R_ZERO = 0, R_RA = 1, R_TP = 2, + R_A0 = 4, R_T0 = 12, R_T1 = 13, R_T2 = 14, @@ -961,7 +968,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. - if (isPairRelaxable(relocs, i)) + if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC && + r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; case R_LARCH_CALL36: @@ -1046,6 +1054,103 @@ static void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { } } +// Convert TLSDESC GD/LD to IE. +// In normal or medium code model, there are two forms of code sequences: +// * pcalau12i $a0, %desc_pc_hi20(sym_desc) +// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// ------ +// * pcaddi $a0, %desc_pcrel_20(a) +// * load $ra, $a0, %desc_ld(a) +// * jirl $ra, $ra, %desc_call(a) +// +// The code sequence obtained is as follows: +// * pcalau12i $a0, %ie_pc_hi20(sym_ie) +// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) +// +// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the +// preceding instructions to NOPs, due to both forms of code sequence +// (corresponding to relocation combinations: +// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and +// R_LARCH_TLS_DESC_PCREL20_S2) have same process. +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { + case R_LARCH_TLS_DESC_PC_HI20: + case R_LARCH_TLS_DESC_PC_LO12: + case R_LARCH_TLS_DESC_PCREL20_S2: + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop + break; + case R_LARCH_TLS_DESC_LD: + write32le(loc, insn(PCALAU12I, R_A0, 0, 0)); // pcalau12i $a0, %ie_pc_hi20 + relocateNoSym(loc, R_LARCH_TLS_IE_PC_HI20, val); + break; + case R_LARCH_TLS_DESC_CALL: + write32le(loc, insn(ctx.arg.is64 ? LD_D : LD_W, R_A0, R_A0, + 0)); // ld.[wd] $a0, $a0, %ie_pc_lo12 + relocateNoSym(loc, R_LARCH_TLS_IE_PC_LO12, val); + break; + default: + llvm_unreachable("unsupported relocation for TLSDESC to IE"); + } +} + +// Convert TLSDESC GD/LD to LE. +// The code sequence obtained in the normal or medium code model is as follows: +// * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0 +// * ori $a0 $a0, %le_lo12(sym_le) +// See the comment in tlsdescToIe for detailed information. +void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsdescToLe"); + + bool isUInt12 = isUInt<12>(val); + switch (rel.type) { + case R_LARCH_TLS_DESC_PC_HI20: + case R_LARCH_TLS_DESC_PC_LO12: + case R_LARCH_TLS_DESC_PCREL20_S2: + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop + break; + case R_LARCH_TLS_DESC_LD: + if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop + else + write32le(loc, insn(LU12I_W, R_A0, extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 + break; + case R_LARCH_TLS_DESC_CALL: + if (isUInt12) + write32le(loc, insn(ORI, R_A0, R_ZERO, val)); // ori $a0, $r0, %le_lo12 + else + write32le(loc, + insn(ORI, R_A0, R_A0, lo12(val))); // ori $a0, $a0, %le_lo12 + break; + default: + llvm_unreachable("unsupported relocation for TLSDESC to LE"); + } +} + +// During TLSDESC GD_TO_IE, the converted code sequence always includes an +// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val +// in `getRelocTargetVA`, expr of this instruction should be adjusted to +// R_RELAX_TLS_GD_TO_IE_ABS, while expr of other instructions related to the +// Hi20 relocation (pcalau12i) should be adjusted to +// RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC. Specifically, in the normal or +// medium code model, the instruction with relocation R_LARCH_TLS_DESC_CALL is +// the candidate of Lo12 relocation. +RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const { + if (expr == R_RELAX_TLS_GD_TO_IE) { + if (type != R_LARCH_TLS_DESC_CALL) + return RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC; + return R_RELAX_TLS_GD_TO_IE_ABS; + } + return expr; +} + void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { const unsigned bits = ctx.arg.is64 ? 64 : 32; uint64_t secAddr = sec.getOutputSection()->addr; @@ -1088,6 +1193,47 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { tlsIeToLe(loc, rel, val); } continue; + case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC: + if (rel.type == R_LARCH_TLS_DESC_PC_HI20) { + // LoongArch does not support TLSDESC GD/LD to LE/IE optimization in the + // extreme code model. In these cases, the relocs are as follows: + // + // * i -- R_LARCH_TLS_DESC_PC_HI20 + // * i+1 -- R_LARCH_TLS_DESC_PC_LO12 + // * i+2 -- R_LARCH_TLS_DESC64_PC_LO20 + // * i+3 -- R_LARCH_TLS_DESC64_PC_HI12 + isExtreme = + (i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20); + } + [[fallthrough]]; + case R_RELAX_TLS_GD_TO_IE_ABS: + if (isExtreme) { + if (rel.type == R_LARCH_TLS_DESC_CALL) + continue; + rel.expr = getRelExpr(rel.type, *rel.sym, loc); + val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); + relocateNoSym(loc, rel.type, val); + } else { + tlsdescToIe(loc, rel, val); + } + continue; + case R_RELAX_TLS_GD_TO_LE: + if (rel.type == R_LARCH_TLS_DESC_PC_HI20) { + isExtreme = + (i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20); + } + if (isExtreme) { + if (rel.type == R_LARCH_TLS_DESC_CALL) + continue; + rel.expr = getRelExpr(rel.type, *rel.sym, loc); + val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); + relocateNoSym(loc, rel.type, val); + } else { + tlsdescToLe(loc, rel, val); + } + continue; default: break; } diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 52c472bb89caf..28c405dd3f8a8 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -831,6 +831,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_GOTPLT_PC: return r.sym->getGotPltVA(ctx) + a - p; case RE_LOONGARCH_GOT_PAGE_PC: + case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC: if (r.sym->hasFlag(NEEDS_TLSGD)) return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p, r.type); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 277acb26987bc..1203919c6fbd8 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1346,22 +1346,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, if (ctx.arg.emachine == EM_MIPS) return handleMipsTlsRelocation(ctx, type, sym, *sec, offset, addend, expr); - // LoongArch does not yet implement transition from TLSDESC to LE/IE, so - // generate TLSDESC dynamic relocation for the dynamic linker to handle. - if (ctx.arg.emachine == EM_LOONGARCH && - oneof(expr)) { - if (expr != R_TLSDESC_CALL) { - sym.setFlags(NEEDS_TLSDESC); - sec->addReloc({expr, type, offset, addend, &sym}); - } - return 1; - } - bool isRISCV = ctx.arg.emachine == EM_RISCV; if (oneof(expr) && + R_TLSDESC_GOTPLT, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr) && ctx.arg.shared) { // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not // set NEEDS_TLSDESC on the label. @@ -1375,10 +1363,14 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } - // LoongArch supports IE to LE optimization in non-extreme code model. + // LoongArch supports IE to LE, DESC GD/LD to IE/LE optimizations in + // non-extreme code model. bool execOptimizeInLoongArch = ctx.arg.emachine == EM_LOONGARCH && - (type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12); + (type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12 || + type == R_LARCH_TLS_DESC_PC_HI20 || type == R_LARCH_TLS_DESC_PC_LO12 || + type == R_LARCH_TLS_DESC_LD || type == R_LARCH_TLS_DESC_CALL || + type == R_LARCH_TLS_DESC_PCREL20_S2); // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE // optimizations. @@ -1437,9 +1429,23 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } + // LoongArch does not support transition from TLSDESC to LE/IE in the extreme + // code model, in which NEEDS_TLSDESC should set, rather than NEEDS_TLSGD. So + // we check independently. + if (ctx.arg.emachine == EM_LOONGARCH && + oneof(expr) && + !execOptimize) { + if (expr != R_TLSDESC_CALL) { + sym.setFlags(NEEDS_TLSDESC); + sec->addReloc({expr, type, offset, addend, &sym}); + } + return 1; + } + if (oneof(expr)) { + RE_LOONGARCH_TLSGD_PAGE_PC, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr)) { if (!execOptimize) { sym.setFlags(NEEDS_TLSGD); sec->addReloc({expr, type, offset, addend, &sym}); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index d2a77bc953109..02ddf707fd950 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -131,6 +131,7 @@ enum RelExpr { RE_LOONGARCH_GOT_PAGE_PC, RE_LOONGARCH_TLSGD_PAGE_PC, RE_LOONGARCH_TLSDESC_PAGE_PC, + RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC, }; // Architecture-neutral representation of relocation. From 99c1a4eaaad1e71aab45861aa48da768e8868d0b Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 17:21:07 +0800 Subject: [PATCH 2/8] Modify loongarch-tlsdesc.s and loongarch-tlsdesc-pcrel20-s2.s Add loongarch-relax-tlsdesc.s --- lld/test/ELF/loongarch-relax-tlsdesc.s | 256 +++++++++++--------- lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s | 59 +++-- lld/test/ELF/loongarch-tlsdesc.s | 65 +++-- 3 files changed, 227 insertions(+), 153 deletions(-) diff --git a/lld/test/ELF/loongarch-relax-tlsdesc.s b/lld/test/ELF/loongarch-relax-tlsdesc.s index 9ce7c5881ca96..5f4368343471c 100644 --- a/lld/test/ELF/loongarch-relax-tlsdesc.s +++ b/lld/test/ELF/loongarch-relax-tlsdesc.s @@ -9,19 +9,18 @@ # RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s # RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.so | FileCheck %s --check-prefix=GD64 -## FIXME: The transition from TLSDESC to IE/LE has not yet been implemented. -## Keep the dynamic relocations and hand them over to dynamic linker. - -# RUN: ld.lld --relax -e 0 -z now a.64.o c.64.o -o a.64.le -# RUN: llvm-readobj -r -x .got a.64.le | FileCheck --check-prefix=LE64-RELA %s -# RUN: llvm-objdump --no-show-raw-insn -d -h a.64.le | FileCheck %s --check-prefix=LE64 +## FIXME: IE/LE relaxation have not yet been implemented, --relax/--no-relax obtain the same results. +## Transition from TLSDESC to IE/LE. Also check --emit-relocs. +# RUN: ld.lld -e 0 -z now --emit-relocs a.64.o c.64.o -o a.64.le +# RUN: llvm-readobj -r -x .got a.64.le 2>&1 | FileCheck --check-prefix=LE64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.le | FileCheck %s --check-prefix=LE64 # RUN: ld.lld --no-relax -e 0 -z now a.64.o c.64.o -o a.64.le.norelax # RUN: llvm-objdump --no-show-raw-insn -d -h a.64.le.norelax | FileCheck %s --check-prefix=LE64-NORELAX -# RUN: ld.lld --relax -e 0 -z now a.64.o c.64.so -o a.64.ie +# RUN: ld.lld --relax -e 0 -z now --emit-relocs a.64.o c.64.so -o a.64.ie # RUN: llvm-readobj -r -x .got a.64.ie | FileCheck --check-prefix=IE64-RELA %s -# RUN: llvm-objdump --no-show-raw-insn -d -h a.64.ie | FileCheck %s --check-prefix=IE64 +# RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.ie | FileCheck %s --check-prefix=IE64 # RUN: ld.lld --no-relax -e 0 -z now a.64.o c.64.so -o a.64.ie.norelax # RUN: llvm-objdump --no-show-raw-insn -d -h a.64.ie.norelax | FileCheck %s --check-prefix=IE64-NORELAX @@ -71,172 +70,199 @@ # GD64-NEXT: jirl $ra, $ra, 0 # GD64-NEXT: add.d $a4, $a0, $tp -# LE64-RELA: .rela.dyn { -# LE64-RELA-NEXT: 0x30280 R_LARCH_TLS_DESC64 - 0x8 -# LE64-RELA-NEXT: 0x30290 R_LARCH_TLS_DESC64 - 0x800 -# LE64-RELA-NEXT: 0x302A0 R_LARCH_TLS_DESC64 - 0x1000 -# LE64-RELA-NEXT: 0x302B0 R_LARCH_TLS_DESC64 - 0x7FF -# LE64-RELA-NEXT: } -# LE64-RELA: Hex dump of section '.got': -# LE64-RELA-NEXT: 0x00030280 00000000 00000000 00000000 00000000 . -# LE64-RELA-NEXT: 0x00030290 00000000 00000000 00000000 00000000 . -# LE64-RELA-NEXT: 0x000302a0 00000000 00000000 00000000 00000000 . -# LE64-RELA-NEXT: 0x000302b0 00000000 00000000 00000000 00000000 . - -# LE64: .got 00000040 0000000000030280 - -## &.got[a]-. = 0x30280 - 0x20228 = 16406<<2 -# LE64: 20228: pcaddi $a0, 16406 -# LE64-NEXT: ld.d $ra, $a0, 0 -# LE64-NEXT: jirl $ra, $ra, 0 +# LE64-RELA: could not find section '.got' + +## a@tprel = 0x8 +# LE64: 20158: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 a +# LE64-NEXT: R_LARCH_RELAX *ABS* +# LE64-NEXT: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 a +# LE64-NEXT: R_LARCH_RELAX *ABS* +# LE64-NEXT: nop +# LE64-NEXT: R_LARCH_TLS_DESC_LD a +# LE64-NEXT: R_LARCH_RELAX *ABS* +# LE64-NEXT: ori $a0, $zero, 8 +# LE64-NEXT: R_LARCH_TLS_DESC_CALL a +# LE64-NEXT: R_LARCH_RELAX *ABS* # LE64-NEXT: add.d $a1, $a0, $tp -## &.got[b]-. = 0x30280+48 - 0x20238: 0x10 pages, page offset 0x2b0 -## R_LARCH_RELAX does not appear in pairs. No relaxation. -# LE64: 20238: pcalau12i $a0, 16 -# LE64-NEXT: addi.d $a0, $a0, 688 -# LE64-NEXT: ld.d $ra, $a0, 0 -# LE64-NEXT: jirl $ra, $ra, 0 +## b@tprel = 0x7ff +# LE64: 2016c: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 b +# LE64-NEXT: R_LARCH_RELAX *ABS* +# LE64-NEXT: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 b +# LE64-NEXT: nop +# LE64-NEXT: R_LARCH_TLS_DESC_LD b +# LE64-NEXT: ori $a0, $zero, 2047 +# LE64-NEXT: R_LARCH_TLS_DESC_CALL b # LE64-NEXT: add.d $a2, $a0, $tp -## &.got[c]-. = 0x30280+16 - 0x2024c: 0x10 pages, page offset 0x290 +## c@tprel = 0x800 ## Without R_LARCH_RELAX relocation. No relaxation. -# LE64: 2024c: pcalau12i $a0, 16 +# LE64: 20180: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 c # LE64-NEXT: addi.d $t0, $zero, 0 -# LE64-NEXT: addi.d $a0, $a0, 656 +# LE64-NEXT: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 c # LE64-NEXT: addi.d $t0, $t0, 1 -# LE64-NEXT: ld.d $ra, $a0, 0 +# LE64-NEXT: nop +# LE64-NEXT: R_LARCH_TLS_DESC_LD c # LE64-NEXT: addi.d $t0, $t0, 1 -# LE64-NEXT: jirl $ra, $ra, 0 +# LE64-NEXT: ori $a0, $zero, 2048 +# LE64-NEXT: R_LARCH_TLS_DESC_CALL c # LE64-NEXT: add.d $a3, $a0, $tp -## &.got[d]-. = 0x30280+32 - 0x2026c = 16397<<2 -# LE64: 2026c: pcaddi $a0, 16397 -# LE64-NEXT: ld.d $ra, $a0, 0 -# LE64-NEXT: jirl $ra, $ra, 0 +## d@tprel = 0x1000 +# LE64: 201a0: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 d +# LE64-NEXT: R_LARCH_RELAX *ABS* +# LE64-NEXT: nop +# LE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 d +# LE64-NEXT: R_LARCH_RELAX *ABS* +# LE64-NEXT: lu12i.w $a0, 1 +# LE64-NEXT: R_LARCH_TLS_DESC_LD d +# LE64-NEXT: ori $a0, $a0, 0 +# LE64-NEXT: R_LARCH_TLS_DESC_CALL d # LE64-NEXT: add.d $a4, $a0, $tp -# LE64-NORELAX: .got 00000040 0000000000030288 - -## &.got[a]-. = 0x30288 - 0x20228 = 0x10 pages, page offset 0x288 -# LE64-NORELAX: 20228: pcalau12i $a0, 16 -# LE64-NORELAX-NEXT: addi.d $a0, $a0, 648 -# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 -# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +## a@tprel = 0x8 +# LE64-NORELAX: 20158: nop +# LE64-NORELAX-NEXT: nop +# LE64-NORELAX-NEXT: nop +# LE64-NORELAX-NEXT: ori $a0, $zero, 8 # LE64-NORELAX-NEXT: add.d $a1, $a0, $tp -## &.got[b]-. = 0x30288+48 - 0x2023c: 0x10 pages, page offset 0x2b8 -## R_LARCH_RELAX does not appear in pairs. No relaxation. -# LE64-NORELAX: 2023c: pcalau12i $a0, 16 -# LE64-NORELAX-NEXT: addi.d $a0, $a0, 696 -# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 -# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +## b@tprel = 0x7ff +# LE64-NORELAX: 2016c: nop +# LE64-NORELAX-NEXT: nop +# LE64-NORELAX-NEXT: nop +# LE64-NORELAX-NEXT: ori $a0, $zero, 2047 # LE64-NORELAX-NEXT: add.d $a2, $a0, $tp -## &.got[c]-. = 0x30288+16 - 0x20250: 0x10 pages, page offset 0x298 +## c@tprel = 0x800 ## Without R_LARCH_RELAX relocation. No relaxation. -# LE64-NORELAX: 20250: pcalau12i $a0, 16 +# LE64-NORELAX: 20180: nop # LE64-NORELAX-NEXT: addi.d $t0, $zero, 0 -# LE64-NORELAX-NEXT: addi.d $a0, $a0, 664 +# LE64-NORELAX-NEXT: nop # LE64-NORELAX-NEXT: addi.d $t0, $t0, 1 -# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# LE64-NORELAX-NEXT: nop # LE64-NORELAX-NEXT: addi.d $t0, $t0, 1 -# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# LE64-NORELAX-NEXT: ori $a0, $zero, 2048 # LE64-NORELAX-NEXT: add.d $a3, $a0, $tp -## &.got[d]-. = 0x30288+32 - 0x20270: 0x10 pages, page offset 0x2a8 -# LE64-NORELAX: 20270: pcalau12i $a0, 16 -# LE64-NORELAX-NEXT: addi.d $a0, $a0, 680 -# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 -# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +## d@tprel = 0x1000 +# LE64-NORELAX: 201a0: nop +# LE64-NORELAX-NEXT: nop +# LE64-NORELAX-NEXT: lu12i.w $a0, 1 +# LE64-NORELAX-NEXT: ori $a0, $a0, 0 # LE64-NORELAX-NEXT: add.d $a4, $a0, $tp # IE64-RELA: .rela.dyn { -# IE64-RELA-NEXT: 0x30430 R_LARCH_TLS_DESC64 - 0x8 -# IE64-RELA-NEXT: 0x30460 R_LARCH_TLS_DESC64 - 0x7FF -# IE64-RELA-NEXT: 0x30440 R_LARCH_TLS_DESC64 c 0x0 -# IE64-RELA-NEXT: 0x30450 R_LARCH_TLS_DESC64 d 0x0 +# IE64-RELA-NEXT: 0x30408 R_LARCH_TLS_TPREL64 c 0x0 +# IE64-RELA-NEXT: 0x30410 R_LARCH_TLS_TPREL64 d 0x0 # IE64-RELA-NEXT: } # IE64-RELA: Hex dump of section '.got': -# IE64-RELA-NEXT: 0x00030430 00000000 00000000 00000000 00000000 . -# IE64-RELA-NEXT: 0x00030440 00000000 00000000 00000000 00000000 . -# IE64-RELA-NEXT: 0x00030450 00000000 00000000 00000000 00000000 . -# IE64-RELA-NEXT: 0x00030460 00000000 00000000 00000000 00000000 . +# IE64-RELA-NEXT: 0x00030408 00000000 00000000 00000000 00000000 . -# IE64: .got 00000040 0000000000030430 +# IE64: .got 00000010 0000000000030408 ## a and b are optimized to use LE. c and d are optimized to IE. -## &.got[a]-. = 0x30430 - 0x202f8 = 16462<<2 -# IE64: 202f8: pcaddi $a0, 16462 -# IE64-NEXT: ld.d $ra, $a0, 0 -# IE64-NEXT: jirl $ra, $ra, 0 +## a@tprel = 0x8 +# IE64: 202c8: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 a +# IE64-NEXT: R_LARCH_RELAX *ABS* +# IE64-NEXT: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 a +# IE64-NEXT: R_LARCH_RELAX *ABS* +# IE64-NEXT: nop +# IE64-NEXT: R_LARCH_TLS_DESC_LD a +# IE64-NEXT: R_LARCH_RELAX *ABS* +# IE64-NEXT: ori $a0, $zero, 8 +# IE64-NEXT: R_LARCH_TLS_DESC_CALL a +# IE64-NEXT: R_LARCH_RELAX *ABS* # IE64-NEXT: add.d $a1, $a0, $tp -## &.got[b]-. = 0x30430+48 - 0x20308: 0x10 pages, page offset 0x460 -## R_LARCH_RELAX does not appear in pairs. No relaxation. -# IE64: 20308: pcalau12i $a0, 16 -# IE64-NEXT: addi.d $a0, $a0, 1120 -# IE64-NEXT: ld.d $ra, $a0, 0 -# IE64-NEXT: jirl $ra, $ra, 0 +## b@tprel = 0x7ff +# IE64: 202dc: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 b +# IE64-NEXT: R_LARCH_RELAX *ABS* +# IE64-NEXT: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 b +# IE64-NEXT: nop +# IE64-NEXT: R_LARCH_TLS_DESC_LD b +# IE64-NEXT: ori $a0, $zero, 2047 +# IE64-NEXT: R_LARCH_TLS_DESC_CALL b # IE64-NEXT: add.d $a2, $a0, $tp -## &.got[c]-. = 0x30430+16 - 0x2031c: 0x10 pages, page offset 0x440 +## &.got[c]-. = 0x30408 - 0x20300: 0x10 pages, page offset 0x408 ## Without R_LARCH_RELAX relocation. No relaxation. -# IE64: 2031c: pcalau12i $a0, 16 +# IE64: 202f0: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 c # IE64-NEXT: addi.d $t0, $zero, 0 -# IE64-NEXT: addi.d $a0, $a0, 1088 +# IE64-NEXT: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 c # IE64-NEXT: addi.d $t0, $t0, 1 -# IE64-NEXT: ld.d $ra, $a0, 0 +# IE64-NEXT: pcalau12i $a0, 16 +# IE64-NEXT: R_LARCH_TLS_DESC_LD c # IE64-NEXT: addi.d $t0, $t0, 1 -# IE64-NEXT: jirl $ra, $ra, 0 +# IE64-NEXT: ld.d $a0, $a0, 1032 +# IE64-NEXT: R_LARCH_TLS_DESC_CALL c # IE64-NEXT: add.d $a3, $a0, $tp -## &.got[d]-. = 0x30430+32 - 0x2033c = 16453<<2 -# IE64: 2033c: pcaddi $a0, 16453 -# IE64-NEXT: ld.d $ra, $a0, 0 -# IE64-NEXT: jirl $ra, $ra, 0 +## &.got[d]-. = 0x30408+8 - 0x20318: 0x10 pages, page offset 0x410 +# IE64: 20310: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 d +# IE64-NEXT: R_LARCH_RELAX *ABS* +# IE64-NEXT: nop +# IE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 d +# IE64-NEXT: R_LARCH_RELAX *ABS* +# IE64-NEXT: pcalau12i $a0, 16 +# IE64-NEXT: R_LARCH_TLS_DESC_LD d +# IE64-NEXT: ld.d $a0, $a0, 1040 +# IE64-NEXT: R_LARCH_TLS_DESC_CALL d # IE64-NEXT: add.d $a4, $a0, $tp -# IE64-NORELAX: .got 00000040 0000000000030438 +# IE64-NORELAX: .got 00000010 0000000000030408 -## &.got[a]-. = 0x30438 - 0x202f8 = 0x10 pages, page offset 0x438 -# IE64-NORELAX: 202f8: pcalau12i $a0, 16 -# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1080 -# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 -# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +## a@tprel = 0x8 +# IE64-NORELAX: 202c8: nop +# IE64-NORELAX-NEXT: nop +# IE64-NORELAX-NEXT: nop +# IE64-NORELAX-NEXT: ori $a0, $zero, 8 # IE64-NORELAX-NEXT: add.d $a1, $a0, $tp -## &.got[b]-. = 0x30438+48 - 0x2030c: 0x10 pages, page offset 0x468 -## R_LARCH_RELAX does not appear in pairs. No relaxation. -# IE64-NORELAX: 2030c: pcalau12i $a0, 16 -# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1128 -# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 -# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +## b@tprel = 0x7ff +# IE64-NORELAX: 202dc: nop +# IE64-NORELAX-NEXT: nop +# IE64-NORELAX-NEXT: nop +# IE64-NORELAX-NEXT: ori $a0, $zero, 2047 # IE64-NORELAX-NEXT: add.d $a2, $a0, $tp -## &.got[c]-. = 0x30438+16 - 0x20320: 0x10 pages, page offset 0x448 +## &.got[c]-. = 0x30408 - 0x20300: 0x10 pages, page offset 0x408 ## Without R_LARCH_RELAX relocation. No relaxation. -# IE64-NORELAX: 20320: pcalau12i $a0, 16 +# IE64-NORELAX: 202f0: nop # IE64-NORELAX-NEXT: addi.d $t0, $zero, 0 -# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1096 +# IE64-NORELAX-NEXT: nop # IE64-NORELAX-NEXT: addi.d $t0, $t0, 1 -# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# IE64-NORELAX-NEXT: pcalau12i $a0, 16 # IE64-NORELAX-NEXT: addi.d $t0, $t0, 1 -# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# IE64-NORELAX-NEXT: ld.d $a0, $a0, 1032 # IE64-NORELAX-NEXT: add.d $a3, $a0, $tp -## &.got[d]-. = 0x30438+32 - 0x20340: 0x10 pages, page offset 0x458 -# IE64-NORELAX: 20340: pcalau12i $a0, 16 -# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1112 -# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 -# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +## &.got[d]-. = 0x30408+8 - 0x20318: 0x10 pages, page offset 0x410 +# IE64-NORELAX: 20310: nop +# IE64-NORELAX-NEXT: nop +# IE64-NORELAX-NEXT: pcalau12i $a0, 16 +# IE64-NORELAX-NEXT: ld.d $a0, $a0, 1040 # IE64-NORELAX-NEXT: add.d $a4, $a0, $tp #--- a.s la.tls.desc $a0, a add.d $a1, $a0, $tp -# ADDI.D does not have R_LARCH_RELAX. No relaxation. +# ADDI.D does not have R_LARCH_RELAX. No relaxation when it is not optimized to IE/LE (--shared). pcalau12i $a0, %desc_pc_hi20(b) .reloc .-4, R_LARCH_RELAX, 0 addi.d $a0, $a0, %desc_pc_lo12(b) diff --git a/lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s b/lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s index 99e21d9935197..422592980d28f 100644 --- a/lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s +++ b/lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s @@ -14,14 +14,14 @@ # RUN: ld.lld -shared -z now a.64.o c.64.o -o rel.64.so -z rel # RUN: llvm-readobj -r -x .got rel.64.so | FileCheck --check-prefix=GD64-REL %s -## FIXME: The transition frome TLSDESC to IE/LE has not yet been implemented. -## Keep the dynamic relocations and hand them over to dynamic linker. - +## Transition from TLSDESC to IE/LE. # RUN: ld.lld -e 0 -z now a.64.o c.64.o -o a.64.le -# RUN: llvm-readobj -r -x .got a.64.le | FileCheck --check-prefix=LE64-RELA %s +# RUN: llvm-readobj -r -x .got a.64.le 2>&1 | FileCheck --check-prefix=LE64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -d a.64.le | FileCheck --check-prefix=LE64 %s # RUN: ld.lld -e 0 -z now a.64.o c.64.so -o a.64.ie # RUN: llvm-readobj -r -x .got a.64.ie | FileCheck --check-prefix=IE64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -d a.64.ie | FileCheck --check-prefix=IE64 %s ## 32-bit code is mostly the same. We only test a few variants. @@ -68,25 +68,46 @@ # GD64-NEXT: jirl $ra, $ra, 0 # GD64-NEXT: add.d $a3, $a0, $tp -# LE64-RELA: .rela.dyn { -# LE64-RELA-NEXT: 0x30240 R_LARCH_TLS_DESC64 - 0x8 -# LE64-RELA-NEXT: 0x30250 R_LARCH_TLS_DESC64 - 0x800 -# LE64-RELA-NEXT: 0x30260 R_LARCH_TLS_DESC64 - 0x7FF -# LE64-RELA-NEXT: } -# LE64-RELA: Hex dump of section '.got': -# LE64-RELA-NEXT: 0x00030240 00000000 00000000 00000000 00000000 . -# LE64-RELA-NEXT: 0x00030250 00000000 00000000 00000000 00000000 . -# LE64-RELA-NEXT: 0x00030260 00000000 00000000 00000000 00000000 . +# LE64-RELA: could not find section '.got' + +# LE64-LABEL: <.text>: +## st_value(a) = 8 +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: ori $a0, $zero, 8 +# LE64-NEXT: add.d $a1, $a0, $tp +## st_value(b) = 2047 +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: ori $a0, $zero, 2047 +# LE64-NEXT: add.d $a2, $a0, $tp +## st_value(c) = 2048 +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: ori $a0, $zero, 2048 +# LE64-NEXT: add.d $a3, $a0, $tp # IE64-RELA: .rela.dyn { -# IE64-RELA-NEXT: 0x303C8 R_LARCH_TLS_DESC64 - 0x8 -# IE64-RELA-NEXT: 0x303E8 R_LARCH_TLS_DESC64 - 0x7FF -# IE64-RELA-NEXT: 0x303D8 R_LARCH_TLS_DESC64 c 0x0 +# IE64-RELA-NEXT: 0x30398 R_LARCH_TLS_TPREL64 c 0x0 # IE64-RELA-NEXT: } # IE64-RELA: Hex dump of section '.got': -# IE64-RELA-NEXT: 0x000303c8 00000000 00000000 00000000 00000000 . -# IE64-RELA-NEXT: 0x000303d8 00000000 00000000 00000000 00000000 . -# IE64-RELA-NEXT: 0x000303e8 00000000 00000000 00000000 00000000 . +# IE64-RELA-NEXT: 0x00030398 00000000 00000000 . + +## a and b are optimized to use LE. c is optimized to IE. +# IE64-LABEL: <.text>: +# IE64-NEXT: nop +# IE64-NEXT: nop +# IE64-NEXT: ori $a0, $zero, 8 +# IE64-NEXT: add.d $a1, $a0, $tp +# IE64-NEXT: nop +# IE64-NEXT: nop +# IE64-NEXT: ori $a0, $zero, 2047 +# IE64-NEXT: add.d $a2, $a0, $tp +## &.got[c]-. = 0x30398 - 0x202ac: 0x10 pages, page offset 0x398 +# IE64-NEXT: nop +# IE64-NEXT: 202ac: pcalau12i $a0, 16 +# IE64-NEXT: ld.d $a0, $a0, 920 +# IE64-NEXT: add.d $a3, $a0, $tp # GD32-REL: .rel.dyn { # GD32-REL-NEXT: 0x20264 R_LARCH_TLS_DESC32 - diff --git a/lld/test/ELF/loongarch-tlsdesc.s b/lld/test/ELF/loongarch-tlsdesc.s index bf09b1e4bbae9..3dc31210d7ddb 100644 --- a/lld/test/ELF/loongarch-tlsdesc.s +++ b/lld/test/ELF/loongarch-tlsdesc.s @@ -14,14 +14,14 @@ # RUN: ld.lld -shared -z now a.64.o c.64.o -o rel.64.so -z rel # RUN: llvm-readobj -r -x .got rel.64.so | FileCheck --check-prefix=GD64-REL %s -## FIXME: The transition frome TLSDESC to IE/LE has not yet been implemented. -## Keep the dynamic relocations and hand them over to dynamic linker. - +## Transition from TLSDESC to IE/LE. # RUN: ld.lld -e 0 -z now a.64.o c.64.o -o a.64.le -# RUN: llvm-readobj -r -x .got a.64.le | FileCheck --check-prefix=LE64-RELA %s +# RUN: llvm-readobj -r -x .got a.64.le 2>&1 | FileCheck --check-prefix=LE64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -d a.64.le | FileCheck --check-prefix=LE64 %s # RUN: ld.lld -e 0 -z now a.64.o c.64.so -o a.64.ie # RUN: llvm-readobj -r -x .got a.64.ie | FileCheck --check-prefix=IE64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -d a.64.ie | FileCheck --check-prefix=IE64 %s ## 32-bit code is mostly the same. We only test a few variants. @@ -71,25 +71,52 @@ # GD64-NEXT: jirl $ra, $ra, 0 # GD64-NEXT: add.d $a3, $a0, $tp -# LE64-RELA: .rela.dyn { -# LE64-RELA-NEXT: 0x30250 R_LARCH_TLS_DESC64 - 0x8 -# LE64-RELA-NEXT: 0x30260 R_LARCH_TLS_DESC64 - 0x800 -# LE64-RELA-NEXT: 0x30270 R_LARCH_TLS_DESC64 - 0x7FF -# LE64-RELA-NEXT: } -# LE64-RELA: Hex dump of section '.got': -# LE64-RELA-NEXT: 0x00030250 00000000 00000000 00000000 00000000 . -# LE64-RELA-NEXT: 0x00030260 00000000 00000000 00000000 00000000 . -# LE64-RELA-NEXT: 0x00030270 00000000 00000000 00000000 00000000 . +# LE64-RELA: could not find section '.got' + +# LE64-LABEL: <.text>: +## st_value(a) = 8 +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: ori $a0, $zero, 8 +# LE64-NEXT: add.d $a1, $a0, $tp +## st_value(b) = 2047 +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: ori $a0, $zero, 2047 +# LE64-NEXT: add.d $a2, $a0, $tp +## st_value(c) = 2048 +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: nop +# LE64-NEXT: ori $a0, $zero, 2048 +# LE64-NEXT: add.d $a3, $a0, $tp # IE64-RELA: .rela.dyn { -# IE64-RELA-NEXT: 0x303D8 R_LARCH_TLS_DESC64 - 0x8 -# IE64-RELA-NEXT: 0x303F8 R_LARCH_TLS_DESC64 - 0x7FF -# IE64-RELA-NEXT: 0x303E8 R_LARCH_TLS_DESC64 c 0x0 +# IE64-RELA-NEXT: 0x303A8 R_LARCH_TLS_TPREL64 c 0x0 # IE64-RELA-NEXT: } # IE64-RELA: Hex dump of section '.got': -# IE64-RELA-NEXT: 0x000303d8 00000000 00000000 00000000 00000000 . -# IE64-RELA-NEXT: 0x000303e8 00000000 00000000 00000000 00000000 . -# IE64-RELA-NEXT: 0x000303f8 00000000 00000000 00000000 00000000 . +# IE64-RELA-NEXT: 0x000303a8 00000000 00000000 . + +## a and b are optimized to use LE. c is optimized to IE. +# IE64-LABEL: <.text>: +# IE64-NEXT: nop +# IE64-NEXT: nop +# IE64-NEXT: nop +# IE64-NEXT: ori $a0, $zero, 8 +# IE64-NEXT: add.d $a1, $a0, $tp +# IE64-NEXT: nop +# IE64-NEXT: nop +# IE64-NEXT: nop +# IE64-NEXT: ori $a0, $zero, 2047 +# IE64-NEXT: add.d $a2, $a0, $tp +## &.got[c]-. = 0x303a8 - 0x202b8: 0x10 pages, page offset 0x3a8 +# IE64-NEXT: nop +# IE64-NEXT: nop +# IE64-NEXT: 202b8: pcalau12i $a0, 16 +# IE64-NEXT: ld.d $a0, $a0, 936 +# IE64-NEXT: add.d $a3, $a0, $tp # GD32-REL: .rel.dyn { # GD32-REL-NEXT: 0x20270 R_LARCH_TLS_DESC32 - From baee006a50c14536f2e60ad4300458458ee67d6c Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Wed, 22 Jan 2025 08:46:22 +0800 Subject: [PATCH 3/8] Delete the wrong flag NEEDS_TLSGD_TO_IE in the extreme code model. --- lld/ELF/Relocations.cpp | 12 +++++++++++- lld/ELF/Symbols.h | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 1203919c6fbd8..86f77fee18e24 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1459,7 +1459,17 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, // label, so TLSDESC=>IE will be categorized as R_RELAX_TLS_GD_TO_LE. We fix // the categorization in RISCV::relocateAllosec-> if (sym.isPreemptible) { - sym.setFlags(NEEDS_TLSGD_TO_IE); + // In LoongArch, TLSDESC code sequences share relocations + // R_LARCH_TLS_DESC_PC_HI20 and R_LARCH_TLS_DESC_PC_LO12 in + // normal/medium/extreme code model. Since the extreme code model cannot + // be optimized to IE/LE, the flag NEEDS_TLSGD_TO_IE added previously + // needs to be cleared. + // In extreme code model, R_LARCH_TLS_DESC64_LO20 and + // R_LARCH_TLS_DESC64_HI12 will set NEEDS_TLSDESC flag. + if (ctx.arg.emachine == EM_LOONGARCH && sym.hasFlag(NEEDS_TLSDESC)) + sym.clearFlags(NEEDS_TLSGD_TO_IE); + else + sym.setFlags(NEEDS_TLSGD_TO_IE); sec->addReloc({ctx.target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, offset, addend, &sym}); } else { diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index 64f2f6eaa8d09..bd41eb0ef8892 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -342,6 +342,9 @@ class Symbol { void setFlags(uint16_t bits) { flags.fetch_or(bits, std::memory_order_relaxed); } + void clearFlags(uint16_t bits) { + flags.fetch_and(~bits, std::memory_order_relaxed); + } bool hasFlag(uint16_t bit) const { assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2"); return flags.load(std::memory_order_relaxed) & bit; From 9d85e62ce7c43a632eda39b9ac31b8be467515f3 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 4 Jan 2025 15:03:47 +0800 Subject: [PATCH 4/8] Support relaxation during TLSDESC GD/LD to IE/LE conversion. Complement https://. When relaxation enable, remove redundant NOPs. --- lld/ELF/Arch/LoongArch.cpp | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index a8787e0caea95..0ee32ace8dba5 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -966,10 +966,16 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_GOT_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: case R_LARCH_TLS_LD_PC_HI20: - case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. - if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC && - r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i)) + if (isPairRelaxable(relocs, i)) + relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); + break; + case R_LARCH_TLS_DESC_PC_HI20: + if (r.expr == RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC || + r.expr == R_RELAX_TLS_GD_TO_LE) { + if (relaxable(relocs, i)) + remove = 4; + } else if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; case R_LARCH_CALL36: @@ -987,6 +993,17 @@ static bool relax(Ctx &ctx, InputSection &sec) { isUInt<12>(r.sym->getVA(ctx, r.addend))) remove = 4; break; + case R_LARCH_TLS_DESC_PC_LO12: + if (relaxable(relocs, i) && + (r.expr == RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC || + r.expr == R_RELAX_TLS_GD_TO_LE)) + remove = 4; + break; + case R_LARCH_TLS_DESC_LD: + if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_GD_TO_LE && + isUInt<12>(r.sym->getVA(ctx, r.addend))) + remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -1215,6 +1232,10 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { bits); relocateNoSym(loc, rel.type, val); } else { + isRelax = relaxable(relocs, i); + if (isRelax && (rel.type == R_LARCH_TLS_DESC_PC_HI20 || + rel.type == R_LARCH_TLS_DESC_PC_LO12)) + continue; tlsdescToIe(loc, rel, val); } continue; @@ -1231,6 +1252,11 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { bits); relocateNoSym(loc, rel.type, val); } else { + isRelax = relaxable(relocs, i); + if (isRelax && (rel.type == R_LARCH_TLS_DESC_PC_HI20 || + rel.type == R_LARCH_TLS_DESC_PC_LO12 || + (rel.type == R_LARCH_TLS_DESC_LD && isUInt<12>(val)))) + continue; tlsdescToLe(loc, rel, val); } continue; From f74a55b34afc06d8a3f469400651e5ee30dc8f77 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 4 Jan 2025 15:30:55 +0800 Subject: [PATCH 5/8] Modify loongarch-relax-tlsdesc.s. --- lld/test/ELF/loongarch-relax-tlsdesc.s | 45 +++++++++----------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/lld/test/ELF/loongarch-relax-tlsdesc.s b/lld/test/ELF/loongarch-relax-tlsdesc.s index 5f4368343471c..025cbc09fbdd8 100644 --- a/lld/test/ELF/loongarch-relax-tlsdesc.s +++ b/lld/test/ELF/loongarch-relax-tlsdesc.s @@ -9,7 +9,6 @@ # RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s # RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.so | FileCheck %s --check-prefix=GD64 -## FIXME: IE/LE relaxation have not yet been implemented, --relax/--no-relax obtain the same results. ## Transition from TLSDESC to IE/LE. Also check --emit-relocs. # RUN: ld.lld -e 0 -z now --emit-relocs a.64.o c.64.o -o a.64.le # RUN: llvm-readobj -r -x .got a.64.le 2>&1 | FileCheck --check-prefix=LE64-RELA %s @@ -73,25 +72,21 @@ # LE64-RELA: could not find section '.got' ## a@tprel = 0x8 -# LE64: 20158: nop +# LE64: 20158: ori $a0, $zero, 8 # LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 a # LE64-NEXT: R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 a # LE64-NEXT: R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT: R_LARCH_TLS_DESC_LD a # LE64-NEXT: R_LARCH_RELAX *ABS* -# LE64-NEXT: ori $a0, $zero, 8 # LE64-NEXT: R_LARCH_TLS_DESC_CALL a # LE64-NEXT: R_LARCH_RELAX *ABS* # LE64-NEXT: add.d $a1, $a0, $tp ## b@tprel = 0x7ff -# LE64: 2016c: nop +# LE64: 20160: nop # LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 b # LE64-NEXT: R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 b # LE64-NEXT: nop # LE64-NEXT: R_LARCH_TLS_DESC_LD b @@ -101,7 +96,7 @@ ## c@tprel = 0x800 ## Without R_LARCH_RELAX relocation. No relaxation. -# LE64: 20180: nop +# LE64: 20170: nop # LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 c # LE64-NEXT: addi.d $t0, $zero, 0 # LE64-NEXT: nop @@ -115,13 +110,11 @@ # LE64-NEXT: add.d $a3, $a0, $tp ## d@tprel = 0x1000 -# LE64: 201a0: nop +# LE64: 20190: lu12i.w $a0, 1 # LE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 d # LE64-NEXT: R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 d # LE64-NEXT: R_LARCH_RELAX *ABS* -# LE64-NEXT: lu12i.w $a0, 1 # LE64-NEXT: R_LARCH_TLS_DESC_LD d # LE64-NEXT: ori $a0, $a0, 0 # LE64-NEXT: R_LARCH_TLS_DESC_CALL d @@ -160,35 +153,31 @@ # LE64-NORELAX-NEXT: add.d $a4, $a0, $tp # IE64-RELA: .rela.dyn { -# IE64-RELA-NEXT: 0x30408 R_LARCH_TLS_TPREL64 c 0x0 -# IE64-RELA-NEXT: 0x30410 R_LARCH_TLS_TPREL64 d 0x0 +# IE64-RELA-NEXT: 0x303F0 R_LARCH_TLS_TPREL64 c 0x0 +# IE64-RELA-NEXT: 0x303F8 R_LARCH_TLS_TPREL64 d 0x0 # IE64-RELA-NEXT: } # IE64-RELA: Hex dump of section '.got': -# IE64-RELA-NEXT: 0x00030408 00000000 00000000 00000000 00000000 . +# IE64-RELA-NEXT: 0x000303f0 00000000 00000000 00000000 00000000 . -# IE64: .got 00000010 0000000000030408 +# IE64: .got 00000010 00000000000303f0 ## a and b are optimized to use LE. c and d are optimized to IE. ## a@tprel = 0x8 -# IE64: 202c8: nop +# IE64: 202c8: ori $a0, $zero, 8 # IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 a # IE64-NEXT: R_LARCH_RELAX *ABS* -# IE64-NEXT: nop # IE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 a # IE64-NEXT: R_LARCH_RELAX *ABS* -# IE64-NEXT: nop # IE64-NEXT: R_LARCH_TLS_DESC_LD a # IE64-NEXT: R_LARCH_RELAX *ABS* -# IE64-NEXT: ori $a0, $zero, 8 # IE64-NEXT: R_LARCH_TLS_DESC_CALL a # IE64-NEXT: R_LARCH_RELAX *ABS* # IE64-NEXT: add.d $a1, $a0, $tp ## b@tprel = 0x7ff -# IE64: 202dc: nop +# IE64: 202d0: nop # IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 b # IE64-NEXT: R_LARCH_RELAX *ABS* -# IE64-NEXT: nop # IE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 b # IE64-NEXT: nop # IE64-NEXT: R_LARCH_TLS_DESC_LD b @@ -196,9 +185,9 @@ # IE64-NEXT: R_LARCH_TLS_DESC_CALL b # IE64-NEXT: add.d $a2, $a0, $tp -## &.got[c]-. = 0x30408 - 0x20300: 0x10 pages, page offset 0x408 +## &.got[c]-. = 0x303f0 - 0x202f0: 0x10 pages, page offset 0x3f0 ## Without R_LARCH_RELAX relocation. No relaxation. -# IE64: 202f0: nop +# IE64: 202e0: nop # IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 c # IE64-NEXT: addi.d $t0, $zero, 0 # IE64-NEXT: nop @@ -207,20 +196,18 @@ # IE64-NEXT: pcalau12i $a0, 16 # IE64-NEXT: R_LARCH_TLS_DESC_LD c # IE64-NEXT: addi.d $t0, $t0, 1 -# IE64-NEXT: ld.d $a0, $a0, 1032 +# IE64-NEXT: ld.d $a0, $a0, 1008 # IE64-NEXT: R_LARCH_TLS_DESC_CALL c # IE64-NEXT: add.d $a3, $a0, $tp -## &.got[d]-. = 0x30408+8 - 0x20318: 0x10 pages, page offset 0x410 -# IE64: 20310: nop +## &.got[d]-. = 0x303f0+8 - 0x20300: 0x10 pages, page offset 0x3f8 +# IE64: 20300: pcalau12i $a0, 16 # IE64-NEXT: R_LARCH_TLS_DESC_PC_HI20 d # IE64-NEXT: R_LARCH_RELAX *ABS* -# IE64-NEXT: nop # IE64-NEXT: R_LARCH_TLS_DESC_PC_LO12 d # IE64-NEXT: R_LARCH_RELAX *ABS* -# IE64-NEXT: pcalau12i $a0, 16 # IE64-NEXT: R_LARCH_TLS_DESC_LD d -# IE64-NEXT: ld.d $a0, $a0, 1040 +# IE64-NEXT: ld.d $a0, $a0, 1016 # IE64-NEXT: R_LARCH_TLS_DESC_CALL d # IE64-NEXT: add.d $a4, $a0, $tp From ae23a3d336aaf825cb6b2b47cd79f4e9c0703c3b Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Tue, 14 Jan 2025 15:50:49 +0800 Subject: [PATCH 6/8] [lld][LoongArch] GOT indirection to PC relative optimization. In LoongArch, this optimization is only supported when relaxation is enabled. From: * pcalau12i $a0, %got_pc_hi20(sym_got) * ld.w/d $a0, $a0, %got_pc_lo12(sym_got) To: * pcalau12i $a0, %pc_hi20(sym) * addi.w/d $a0, $a0, %pc_lo12(sym) If the original code sequence can be relaxed into a single instruction `pcaddi`, this patch will not be taken (see https://). The implementation related to `got` is split into two locations because the `relax()` function is part of an iteration fixed-point algorithm. We should minimize it to achieve better linker performance. FIXME: Althouth the optimization has been performed, the GOT entries still exists, similarly to AArch64. Eliminating the entries may be require additional marking in the common code. --- lld/ELF/Arch/LoongArch.cpp | 66 +++++++++++++++++++++ lld/test/ELF/loongarch-relax-pc-hi20-lo12.s | 10 ++-- 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0ee32ace8dba5..5716936d547c7 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -46,6 +46,8 @@ class LoongArch final : public TargetInfo { private: void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, + const Relocation &rLo12, uint64_t secAddr) const; }; } // end anonymous namespace @@ -1151,6 +1153,54 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, } } +// Try GOT indirection to PC relative optimization when relaxation is enabled. +// From: +// * pcalau12i $a0, %got_pc_hi20(sym_got) +// * ld.w/d $a0, $a0, %got_pc_lo12(sym_got) +// To: +// * pcalau12i $a0, %pc_hi20(sym) +// * addi.w/d $a0, $a0, %pc_lo12(sym) +// +// FIXME: Althouth the optimization has been performed, the GOT entries still +// exists, similarly to AArch64. Eliminating the entries may be require +// additional marking in the common code. +bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, + const Relocation &rLo12, uint64_t secAddr) const { + if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible || + rHi20.sym->isGnuIFunc() || + (ctx.arg.isPic && !cast(*rHi20.sym).section)) + return false; + + Symbol &sym = *rHi20.sym; + uint64_t symLocal = sym.getVA(ctx) + rHi20.addend; + // Check if the address difference is within +/-2GB range. + // For simplicity, the range mentioned here is an approximate estimate and is + // not fully equivalent to the entire region that PC-relative addressing can + // cover. + int64_t pageOffset = + getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset); + if (!isInt<20>(pageOffset >> 12)) + return false; + + Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset, + rHi20.addend, &sym}; + Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend, + &sym}; + + const uint32_t currInsn = read32le(loc); + const uint32_t nextInsn = read32le(loc + 4); + uint64_t pageDelta = + getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type); + // pcalau12i $a0, %pc_hi20 + write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0)); + relocate(loc, newRHi20, pageDelta); + // addi.w/d $a0, $a0, %pc_lo12 + write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn), + getJ5(nextInsn), 0)); + relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64)); + return true; +} + // During TLSDESC GD_TO_IE, the converted code sequence always includes an // instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val // in `getRelocTargetVA`, expr of this instruction should be adjusted to @@ -1260,6 +1310,22 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { tlsdescToLe(loc, rel, val); } continue; + case RE_LOONGARCH_GOT_PAGE_PC: + // In LoongArch, we try GOT indirection to PC relative optimization only + // when relaxation is enabled. This approach avoids determining whether + // relocation types are paired and whether the destination register of + // pcalau12i is only used by the immediately following instruction. + // Moreover, if the original code sequence can be relaxed to a single + // instruction `pcaddi`, the first instruction will be removed and it will + // not reach here. + if (isPairRelaxable(relocs, i) && rel.type == R_LARCH_GOT_PC_HI20 && + relocs[i + 2].type == R_LARCH_GOT_PC_LO12 && + tryGotToPCRel(loc, rel, relocs[i + 2], secAddr)) { + i = i + 3; // skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12, + // R_LARCH_RELAX + continue; + } + break; default: break; } diff --git a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s index a417d89e9fa2e..c0bf6b3ba2811 100644 --- a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s +++ b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s @@ -30,24 +30,26 @@ ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0 # NORELAX32-NEXT: 10000: pcalau12i $a0, 1024 # NORELAX32-NEXT: addi.w $a0, $a0, 0 +## Not relaxation, convertion to PCRel. # NORELAX32-NEXT: pcalau12i $a0, 1024 -# NORELAX32-NEXT: ld.w $a0, $a0, 4 +# NORELAX32-NEXT: addi.w $a0, $a0, 0 # NORELAX32-NEXT: pcalau12i $a0, 1024 # NORELAX32-NEXT: addi.w $a0, $a0, 0 # NORELAX32-NEXT: pcalau12i $a0, 1024 -# NORELAX32-NEXT: ld.w $a0, $a0, 4 +# NORELAX32-NEXT: addi.w $a0, $a0, 0 # NORELAX64-LABEL: <_start>: ## offset exceed range of pcaddi ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0 # NORELAX64-NEXT: 10000: pcalau12i $a0, 1024 # NORELAX64-NEXT: addi.d $a0, $a0, 0 +## Not relaxation, convertion to PCRel. # NORELAX64-NEXT: pcalau12i $a0, 1024 -# NORELAX64-NEXT: ld.d $a0, $a0, 8 +# NORELAX64-NEXT: addi.d $a0, $a0, 0 # NORELAX64-NEXT: pcalau12i $a0, 1024 # NORELAX64-NEXT: addi.d $a0, $a0, 0 # NORELAX64-NEXT: pcalau12i $a0, 1024 -# NORELAX64-NEXT: ld.d $a0, $a0, 8 +# NORELAX64-NEXT: addi.d $a0, $a0, 0 .section .text .global _start From a2768a558ca995fe65a37582664984839e2b57a5 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Tue, 21 Jan 2025 09:09:23 +0800 Subject: [PATCH 7/8] Add check for register. --- lld/ELF/Arch/LoongArch.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 5716936d547c7..226c0fbac567d 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -1189,6 +1189,10 @@ bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, const uint32_t currInsn = read32le(loc); const uint32_t nextInsn = read32le(loc + 4); + // Check if use the same register. + if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn)) + return false; + uint64_t pageDelta = getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type); // pcalau12i $a0, %pc_hi20 From 2d92dc3684b5b50be2ec69d7f5f6f151d7ff4e51 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Wed, 22 Jan 2025 14:02:31 +0800 Subject: [PATCH 8/8] Delete FIXME according to review. --- lld/ELF/Arch/LoongArch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 226c0fbac567d..adf503732d547 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -1161,9 +1161,9 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, // * pcalau12i $a0, %pc_hi20(sym) // * addi.w/d $a0, $a0, %pc_lo12(sym) // -// FIXME: Althouth the optimization has been performed, the GOT entries still -// exists, similarly to AArch64. Eliminating the entries may be require -// additional marking in the common code. +// Note: Althouth the optimization has been performed, the GOT entries still +// exists, similarly to AArch64. Eliminating the entries will increase code +// complexity. bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, const Relocation &rLo12, uint64_t secAddr) const { if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||