diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e0cf739f67d9b..fa73d8fb665d6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16117,6 +16117,46 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); } +// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c) +// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c) +// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c) +// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c) +static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!Subtarget.hasCZEROLike()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + auto IsEqualCompZero = [](SDValue &V) -> bool { + if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) { + ISD::CondCode CC = cast(V.getOperand(2))->get(); + if (ISD::isIntEqualitySetCC(CC)) + return true; + } + return false; + }; + + if (!IsEqualCompZero(N0) || !N0.hasOneUse()) + std::swap(N0, N1); + if (!IsEqualCompZero(N0) || !N0.hasOneUse()) + return SDValue(); + + KnownBits Known = DAG.computeKnownBits(N1); + if (Known.getMaxValue().ugt(1)) + return SDValue(); + + unsigned CzeroOpcode = + (cast(N0.getOperand(2))->get() == ISD::SETNE) + ? RISCVISD::CZERO_EQZ + : RISCVISD::CZERO_NEZ; + + EVT VT = N->getValueType(0); + SDLoc DL(N); + return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0)); +} + static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; @@ -16180,7 +16220,9 @@ static SDValue performANDCombine(SDNode *N, if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget)) return V; - + if (DCI.isAfterLegalizeDAG()) + if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget)) + return V; if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index bf6802deeffdc..93b68b0a95b48 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -1834,13 +1834,12 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32ZICOND-NEXT: mul a5, a3, a0 ; RV32ZICOND-NEXT: mul a6, a1, a2 ; RV32ZICOND-NEXT: mulhu a7, a0, a2 -; RV32ZICOND-NEXT: snez t0, a3 +; RV32ZICOND-NEXT: add a5, a6, a5 +; RV32ZICOND-NEXT: snez a6, a3 ; RV32ZICOND-NEXT: mulhu a3, a3, a0 -; RV32ZICOND-NEXT: mul t1, a0, a2 +; RV32ZICOND-NEXT: mul t0, a0, a2 ; RV32ZICOND-NEXT: mulhu a0, a1, a2 -; RV32ZICOND-NEXT: snez a1, a1 -; RV32ZICOND-NEXT: add a5, a6, a5 -; RV32ZICOND-NEXT: and a1, a1, t0 +; RV32ZICOND-NEXT: czero.eqz a1, a6, a1 ; RV32ZICOND-NEXT: snez a0, a0 ; RV32ZICOND-NEXT: snez a2, a3 ; RV32ZICOND-NEXT: add a5, a7, a5 @@ -1848,7 +1847,7 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32ZICOND-NEXT: sltu a1, a5, a7 ; RV32ZICOND-NEXT: or a0, a0, a2 ; RV32ZICOND-NEXT: or a0, a0, a1 -; RV32ZICOND-NEXT: sw t1, 0(a4) +; RV32ZICOND-NEXT: sw t0, 0(a4) ; RV32ZICOND-NEXT: sw a5, 4(a4) ; RV32ZICOND-NEXT: ret ; @@ -3690,11 +3689,10 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) { ; RV32ZICOND-NEXT: mul a5, a1, a2 ; RV32ZICOND-NEXT: snez a6, a3 ; RV32ZICOND-NEXT: add a4, a5, a4 -; RV32ZICOND-NEXT: snez a5, a1 -; RV32ZICOND-NEXT: and a5, a5, a6 -; RV32ZICOND-NEXT: mulhu a6, a1, a2 -; RV32ZICOND-NEXT: snez a6, a6 -; RV32ZICOND-NEXT: or a5, a5, a6 +; RV32ZICOND-NEXT: mulhu a5, a1, a2 +; RV32ZICOND-NEXT: czero.eqz a6, a6, a1 +; RV32ZICOND-NEXT: snez a5, a5 +; RV32ZICOND-NEXT: or a5, a6, a5 ; RV32ZICOND-NEXT: mulhu a6, a0, a2 ; RV32ZICOND-NEXT: add a4, a6, a4 ; RV32ZICOND-NEXT: sltu a4, a4, a6 @@ -3783,18 +3781,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) { ; RV32ZICOND: # %bb.0: # %entry ; RV32ZICOND-NEXT: mul a4, a3, a0 ; RV32ZICOND-NEXT: mul a5, a1, a2 -; RV32ZICOND-NEXT: mulhu a6, a0, a2 +; RV32ZICOND-NEXT: add a4, a5, a4 +; RV32ZICOND-NEXT: mulhu a5, a0, a2 ; RV32ZICOND-NEXT: mulhu a0, a3, a0 ; RV32ZICOND-NEXT: snez a3, a3 ; RV32ZICOND-NEXT: mulhu a2, a1, a2 -; RV32ZICOND-NEXT: snez a1, a1 -; RV32ZICOND-NEXT: add a4, a5, a4 -; RV32ZICOND-NEXT: and a1, a1, a3 +; RV32ZICOND-NEXT: czero.eqz a1, a3, a1 ; RV32ZICOND-NEXT: snez a2, a2 ; RV32ZICOND-NEXT: snez a0, a0 -; RV32ZICOND-NEXT: add a4, a6, a4 +; RV32ZICOND-NEXT: add a4, a5, a4 ; RV32ZICOND-NEXT: or a1, a1, a2 -; RV32ZICOND-NEXT: sltu a2, a4, a6 +; RV32ZICOND-NEXT: sltu a2, a4, a5 ; RV32ZICOND-NEXT: or a0, a1, a0 ; RV32ZICOND-NEXT: or a0, a0, a2 ; RV32ZICOND-NEXT: xori a0, a0, 1 @@ -5156,18 +5153,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) { ; RV32ZICOND: # %bb.0: # %entry ; RV32ZICOND-NEXT: mul a4, a3, a0 ; RV32ZICOND-NEXT: mul a5, a1, a2 -; RV32ZICOND-NEXT: mulhu a6, a0, a2 +; RV32ZICOND-NEXT: add a4, a5, a4 +; RV32ZICOND-NEXT: mulhu a5, a0, a2 ; RV32ZICOND-NEXT: mulhu a0, a3, a0 ; RV32ZICOND-NEXT: snez a3, a3 ; RV32ZICOND-NEXT: mulhu a2, a1, a2 -; RV32ZICOND-NEXT: snez a1, a1 -; RV32ZICOND-NEXT: add a4, a5, a4 -; RV32ZICOND-NEXT: and a1, a1, a3 +; RV32ZICOND-NEXT: czero.eqz a1, a3, a1 ; RV32ZICOND-NEXT: snez a2, a2 ; RV32ZICOND-NEXT: snez a0, a0 -; RV32ZICOND-NEXT: add a4, a6, a4 +; RV32ZICOND-NEXT: add a4, a5, a4 ; RV32ZICOND-NEXT: or a1, a1, a2 -; RV32ZICOND-NEXT: sltu a2, a4, a6 +; RV32ZICOND-NEXT: sltu a2, a4, a5 ; RV32ZICOND-NEXT: or a0, a1, a0 ; RV32ZICOND-NEXT: or a0, a0, a2 ; RV32ZICOND-NEXT: beqz a0, .LBB64_2 diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll index d8e2b2c2bf58d..6db90d8698dd3 100644 --- a/llvm/test/CodeGen/RISCV/zicond-opts.ll +++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll @@ -7,22 +7,132 @@ define i32 @icmp_and(i64 %x, i64 %y) { ; RV32ZICOND-LABEL: icmp_and: ; RV32ZICOND: # %bb.0: ; RV32ZICOND-NEXT: or a2, a2, a3 +; RV32ZICOND-NEXT: snez a2, a2 +; RV32ZICOND-NEXT: or a0, a0, a1 +; RV32ZICOND-NEXT: czero.eqz a0, a2, a0 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: icmp_and: +; RV64ZICOND: # %bb.0: +; RV64ZICOND-NEXT: snez a1, a1 +; RV64ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZICOND-NEXT: ret + %3 = icmp ne i64 %y, 0 + %4 = icmp ne i64 %x, 0 + %5 = and i1 %4, %3 + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +; Make sure we choose the replace the single use icmp +define i32 @icmp_and_x_multiple_uses(i64 %x, i64 %y) { +; RV32ZICOND-LABEL: icmp_and_x_multiple_uses: +; RV32ZICOND: # %bb.0: +; RV32ZICOND-NEXT: or a2, a2, a3 +; RV32ZICOND-NEXT: or a0, a0, a1 +; RV32ZICOND-NEXT: snez a0, a0 +; RV32ZICOND-NEXT: czero.eqz a1, a0, a2 +; RV32ZICOND-NEXT: add a0, a1, a0 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: icmp_and_x_multiple_uses: +; RV64ZICOND: # %bb.0: +; RV64ZICOND-NEXT: snez a0, a0 +; RV64ZICOND-NEXT: czero.eqz a1, a0, a1 +; RV64ZICOND-NEXT: add a0, a1, a0 +; RV64ZICOND-NEXT: ret + %3 = icmp ne i64 %y, 0 + %4 = icmp ne i64 %x, 0 + %5 = and i1 %4, %3 + %6 = zext i1 %5 to i32 + %7 = zext i1 %4 to i32 + %8 = add i32 %6, %7 + ret i32 %8 +} + +; Make sure we choose the replace the single use icmp +define i32 @icmp_and_y_multiple_uses(i64 %x, i64 %y) { +; RV32ZICOND-LABEL: icmp_and_y_multiple_uses: +; RV32ZICOND: # %bb.0: +; RV32ZICOND-NEXT: or a2, a2, a3 +; RV32ZICOND-NEXT: snez a2, a2 +; RV32ZICOND-NEXT: or a0, a0, a1 +; RV32ZICOND-NEXT: czero.eqz a0, a2, a0 +; RV32ZICOND-NEXT: add a0, a0, a2 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: icmp_and_y_multiple_uses: +; RV64ZICOND: # %bb.0: +; RV64ZICOND-NEXT: snez a1, a1 +; RV64ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZICOND-NEXT: add a0, a0, a1 +; RV64ZICOND-NEXT: ret + %3 = icmp ne i64 %y, 0 + %4 = icmp ne i64 %x, 0 + %5 = and i1 %4, %3 + %6 = zext i1 %5 to i32 + %7 = zext i1 %3 to i32 + %8 = add i32 %6, %7 + ret i32 %8 +} + +; Both icmp's have multiple uses, don't optimize +define i32 @icmp_and_xy_multiple_uses(i64 %x, i64 %y) { +; RV32ZICOND-LABEL: icmp_and_xy_multiple_uses: +; RV32ZICOND: # %bb.0: +; RV32ZICOND-NEXT: or a2, a2, a3 ; RV32ZICOND-NEXT: or a0, a0, a1 ; RV32ZICOND-NEXT: snez a1, a2 ; RV32ZICOND-NEXT: snez a0, a0 -; RV32ZICOND-NEXT: and a0, a0, a1 +; RV32ZICOND-NEXT: and a2, a0, a1 +; RV32ZICOND-NEXT: add a0, a1, a0 +; RV32ZICOND-NEXT: add a0, a2, a0 ; RV32ZICOND-NEXT: ret ; -; RV64ZICOND-LABEL: icmp_and: +; RV64ZICOND-LABEL: icmp_and_xy_multiple_uses: ; RV64ZICOND: # %bb.0: ; RV64ZICOND-NEXT: snez a1, a1 ; RV64ZICOND-NEXT: snez a0, a0 -; RV64ZICOND-NEXT: and a0, a0, a1 +; RV64ZICOND-NEXT: and a2, a0, a1 +; RV64ZICOND-NEXT: add a0, a1, a0 +; RV64ZICOND-NEXT: add a0, a2, a0 ; RV64ZICOND-NEXT: ret %3 = icmp ne i64 %y, 0 %4 = icmp ne i64 %x, 0 %5 = and i1 %4, %3 %6 = zext i1 %5 to i32 + %7 = zext i1 %3 to i32 + %8 = zext i1 %4 to i32 + %9 = add i32 %6, %7 + %10 = add i32 %9, %8 + ret i32 %10 +} + + +; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y) +define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) { +; RV32ZICOND-LABEL: icmp_and_select: +; RV32ZICOND: # %bb.0: +; RV32ZICOND-NEXT: sgtz a5, a3 +; RV32ZICOND-NEXT: snez a2, a2 +; RV32ZICOND-NEXT: czero.eqz a5, a5, a3 +; RV32ZICOND-NEXT: czero.nez a2, a2, a3 +; RV32ZICOND-NEXT: or a2, a2, a5 +; RV32ZICOND-NEXT: or a0, a0, a1 +; RV32ZICOND-NEXT: czero.eqz a0, a2, a0 +; RV32ZICOND-NEXT: czero.eqz a0, a4, a0 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: icmp_and_select: +; RV64ZICOND: # %bb.0: +; RV64ZICOND-NEXT: sgtz a1, a1 +; RV64ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZICOND-NEXT: czero.eqz a0, a2, a0 +; RV64ZICOND-NEXT: ret + %3 = icmp sgt i64 %y, 0 + %4 = icmp ne i64 %x, 0 + %5 = and i1 %4, %3 + %6 = select i1 %5, i32 %z, i32 0 ret i32 %6 } @@ -32,21 +142,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) { ; RV32ZICOND: # %bb.0: ; RV32ZICOND-NEXT: or a2, a2, a3 ; RV32ZICOND-NEXT: or a0, a0, a1 -; RV32ZICOND-NEXT: or a4, a4, a5 -; RV32ZICOND-NEXT: snez a1, a2 ; RV32ZICOND-NEXT: snez a0, a0 -; RV32ZICOND-NEXT: and a0, a1, a0 -; RV32ZICOND-NEXT: snez a1, a4 -; RV32ZICOND-NEXT: and a0, a1, a0 +; RV32ZICOND-NEXT: czero.eqz a0, a0, a2 +; RV32ZICOND-NEXT: or a4, a4, a5 +; RV32ZICOND-NEXT: czero.eqz a0, a0, a4 ; RV32ZICOND-NEXT: ret ; ; RV64ZICOND-LABEL: icmp_and_and: ; RV64ZICOND: # %bb.0: -; RV64ZICOND-NEXT: snez a1, a1 ; RV64ZICOND-NEXT: snez a0, a0 -; RV64ZICOND-NEXT: and a0, a1, a0 -; RV64ZICOND-NEXT: snez a1, a2 -; RV64ZICOND-NEXT: and a0, a1, a0 +; RV64ZICOND-NEXT: czero.eqz a0, a0, a1 +; RV64ZICOND-NEXT: czero.eqz a0, a0, a2 ; RV64ZICOND-NEXT: ret %4 = icmp ne i64 %y, 0 %5 = icmp ne i64 %x, 0