diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6529ab7a84a13..539aa35255450 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13416,6 +13416,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return SDValue(); uint64_t MulAmt = CNode->getZExtValue(); + // WARNING: The code below is knowingly incorrect with regards to undef semantics. + // We're adding additional uses of X here, and in principle, we should be freezing + // X before doing so. However, adding freeze here causes real regressions, and no + // other target properly freezes X in these cases either. + SDValue X = N->getOperand(0); + for (uint64_t Divisor : {3, 5, 9}) { if (MulAmt % Divisor != 0) continue; @@ -13428,7 +13434,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) { SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); @@ -13446,7 +13451,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (ScaleShift >= 1 && ScaleShift < 4) { unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, @@ -13466,7 +13470,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, unsigned TZ = llvm::countr_zero(C); if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) { SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); @@ -13481,7 +13484,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (ScaleShift >= 1 && ScaleShift < 4) { unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2))); SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, Shift1, @@ -13495,11 +13497,11 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (isPowerOf2_64(MulAmt + Offset)) { SDLoc DL(N); SDValue Shift1 = - DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT)); - SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, N->getOperand(0), + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getConstant(Log2_64(Offset - 1), DL, VT), - N->getOperand(0)); + X); return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359); } } diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 0745b59c06cc8..817e2b7d0bd99 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -4,7 +4,9 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+zba -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBANOZBB ; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB +; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBNOZBS +; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbs -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBZBS define i64 @slliuw(i64 %a) nounwind { ; RV64I-LABEL: slliuw: @@ -2733,3 +2735,121 @@ define i64 @mul_neg8(i64 %a) { %c = mul i64 %a, -8 ret i64 %c } + +define i64 @bext_mul12(i32 %1, i32 %2) { +; RV64I-LABEL: bext_mul12: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: li a1, 12 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBANOZBB-LABEL: bext_mul12: +; RV64ZBANOZBB: # %bb.0: # %entry +; RV64ZBANOZBB-NEXT: srlw a0, a0, a1 +; RV64ZBANOZBB-NEXT: andi a0, a0, 1 +; RV64ZBANOZBB-NEXT: sh1add a0, a0, a0 +; RV64ZBANOZBB-NEXT: slli a0, a0, 2 +; RV64ZBANOZBB-NEXT: ret +; +; RV64ZBAZBBNOZBS-LABEL: bext_mul12: +; RV64ZBAZBBNOZBS: # %bb.0: # %entry +; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1 +; RV64ZBAZBBNOZBS-NEXT: sh1add a0, a0, a0 +; RV64ZBAZBBNOZBS-NEXT: slli a0, a0, 2 +; RV64ZBAZBBNOZBS-NEXT: ret +; +; RV64ZBAZBBZBS-LABEL: bext_mul12: +; RV64ZBAZBBZBS: # %bb.0: # %entry +; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: sh1add a0, a0, a0 +; RV64ZBAZBBZBS-NEXT: slli a0, a0, 2 +; RV64ZBAZBBZBS-NEXT: ret +entry: + %3 = lshr i32 %1, %2 + %4 = and i32 %3, 1 + %5 = zext nneg i32 %4 to i64 + %6 = mul i64 %5, 12 + ret i64 %6 +} + +define i64 @bext_mul45(i32 %1, i32 %2) { +; RV64I-LABEL: bext_mul45: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: li a1, 45 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBANOZBB-LABEL: bext_mul45: +; RV64ZBANOZBB: # %bb.0: # %entry +; RV64ZBANOZBB-NEXT: srlw a0, a0, a1 +; RV64ZBANOZBB-NEXT: andi a0, a0, 1 +; RV64ZBANOZBB-NEXT: sh2add a0, a0, a0 +; RV64ZBANOZBB-NEXT: sh3add a0, a0, a0 +; RV64ZBANOZBB-NEXT: ret +; +; RV64ZBAZBBNOZBS-LABEL: bext_mul45: +; RV64ZBAZBBNOZBS: # %bb.0: # %entry +; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1 +; RV64ZBAZBBNOZBS-NEXT: sh2add a0, a0, a0 +; RV64ZBAZBBNOZBS-NEXT: sh3add a0, a0, a0 +; RV64ZBAZBBNOZBS-NEXT: ret +; +; RV64ZBAZBBZBS-LABEL: bext_mul45: +; RV64ZBAZBBZBS: # %bb.0: # %entry +; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a0 +; RV64ZBAZBBZBS-NEXT: sh3add a0, a0, a0 +; RV64ZBAZBBZBS-NEXT: ret +entry: + %3 = lshr i32 %1, %2 + %4 = and i32 %3, 1 + %5 = zext nneg i32 %4 to i64 + %6 = mul i64 %5, 45 + ret i64 %6 +} + +define i64 @bext_mul132(i32 %1, i32 %2) { +; RV64I-LABEL: bext_mul132: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: li a1, 132 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBANOZBB-LABEL: bext_mul132: +; RV64ZBANOZBB: # %bb.0: # %entry +; RV64ZBANOZBB-NEXT: srlw a0, a0, a1 +; RV64ZBANOZBB-NEXT: andi a0, a0, 1 +; RV64ZBANOZBB-NEXT: slli a1, a0, 7 +; RV64ZBANOZBB-NEXT: sh2add a0, a0, a1 +; RV64ZBANOZBB-NEXT: ret +; +; RV64ZBAZBBNOZBS-LABEL: bext_mul132: +; RV64ZBAZBBNOZBS: # %bb.0: # %entry +; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1 +; RV64ZBAZBBNOZBS-NEXT: slli a1, a0, 7 +; RV64ZBAZBBNOZBS-NEXT: sh2add a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: ret +; +; RV64ZBAZBBZBS-LABEL: bext_mul132: +; RV64ZBAZBBZBS: # %bb.0: # %entry +; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: slli a1, a0, 7 +; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: ret +entry: + %3 = lshr i32 %1, %2 + %4 = and i32 %3, 1 + %5 = zext nneg i32 %4 to i64 + %6 = mul i64 %5, 132 + ret i64 %6 +} +