-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[RISCV] Optimize (and (icmp x, 0, neq), (icmp y, 0, neq)) utilizing zicond extension #166469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…Sequence Will need the extra information for RISCV lowering.
Will guide lowering chioces in DAGCombine.
…ond extension %1 = icmp x, 0, eq %2 = icmp y, 0, eq %3 = and %1, %2 Origionally lowered to: %1 = seqz x %2 = seqz y %3 = and %1, %2 With optimiztion: %1 = seqz x %3 = czero.eqz %1, y
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: Ryan Buchner (bababuck) ChangesOrigionally lowered to: With optimiztion: Full diff: https://github.com/llvm/llvm-project/pull/166469.diff 7 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 78f63b4406eb0..dac4dee97fbe5 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2489,8 +2489,8 @@ class LLVM_ABI TargetLoweringBase {
/// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
/// that it saves us from materializing N0 and N1 in an integer register.
/// Targets that are able to perform and/or on flags should return false here.
- virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
- EVT VT) const {
+ virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT,
+ SDNode *) const {
// If a target has multiple condition registers, then it likely has logical
// operations on those registers.
if (hasMultipleConditionRegisters(VT))
@@ -2502,6 +2502,10 @@ class LLVM_ABI TargetLoweringBase {
Action != TypeSplitVector;
}
+ // Return true is targets has a conditional zero-ing instruction
+ // i.e. select cond, x, 0
+ virtual bool hasConditionalZero() const { return false; }
+
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
/// Return true if a select of constants (select Cond, C1, C2) should be
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 46c4bb85a7420..8a9ed7a7eac46 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12202,7 +12202,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
// select Cond, T, Cond --> and Cond, freeze(T)
// select Cond, T, 0 --> and Cond, freeze(T)
- if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
+ // select Cond, T, 0 is a conditional zero
+ if (Cond == F || (!TLI.hasConditionalZero() &&
+ isNullOrNullSplat(F, /* AllowUndefs */ true)))
return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
// select Cond, T, 1 --> or (not Cond), freeze(T)
@@ -12213,7 +12215,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
}
// select Cond, 0, F --> and (not Cond), freeze(F)
- if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
+ // select Cond, 0, F is a conditional zero
+ if (!TLI.hasConditionalZero() &&
+ isNullOrNullSplat(T, /* AllowUndefs */ true)) {
SDValue NotCond =
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
@@ -12387,7 +12391,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// and we always transform to the left side if we know that we can further
// optimize the combination of the conditions.
bool normalizeToSequence =
- TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT, N);
// select (and Cond0, Cond1), X, Y
// -> select Cond0, (select Cond1, X, Y), Y
if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e993b26..e1e9b37d6e5bf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29457,8 +29457,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
return all_equal(ValueVTs);
}
-bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
- EVT) const {
+bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT,
+ SDNode *) const {
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 2cb8ed29f252a..52b1a6ab4c90d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -867,7 +867,8 @@ class AArch64TargetLowering : public TargetLowering {
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
- bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT,
+ SDNode *) const override;
void finalizeLowering(MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e0cf739f67d9b..2d178586a0827 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2763,6 +2763,51 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
+// Can the given operation be interchanged with a Zicond::CZERO operation
+// Must be:
+// - a SETCC instruction
+// - Must compare a value for [in]equality against 0
+static bool isCzeroCompatible(const SDValue Op) {
+ if (Op.getValueType() == MVT::i1 && Op.getOpcode() == ISD::SETCC &&
+ isNullConstant(Op.getOperand(1))) {
+ ISD::CondCode CondCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ return CondCode == ISD::SETNE || CondCode == ISD::SETEQ;
+ }
+ return false;
+}
+
+// Disable normalizing for most cases
+// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+// select(N0|N1, X, Y) => select(N0, Y, select(N1, X, Y))
+// For select(N0, select(N1, X, Y), Y), if Y=0 and N0=setcc(eqz || nez):
+// %N1 = setcc [any_cond] %A, %B
+// %CZ = czero.eqz %N1, X
+// %Res = czero.eqz %N0, %CZ
+// ...
+// But for select(N0&N1, X, Y):
+// %N0 = setcc [eq/ne] %C, 0
+// %N1 = setcc [any_cond] %A, %B
+// %And = and %N0, %N1
+// %Res = czero.eqz %And, %X
+bool RISCVTargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
+ SDNode *N) const {
+ if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
+ assert(
+ N->getOpcode() == ISD::SELECT &&
+ "shouldNormalizeToSelectSequence() called with non-SELECT operation");
+ const SDValue &CondV = N->getOperand(0);
+ const SDValue &TrueV = N->getOperand(1);
+ const SDValue &FalseV = N->getOperand(2);
+ if (CondV.hasOneUse() && isCzeroCompatible(CondV) && isNullConstant(FalseV))
+ return true;
+ }
+ return false;
+}
+
+bool RISCVTargetLowering::hasConditionalZero() const {
+ return Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps();
+}
+
bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
if (!ScalarTy.isSimple())
return false;
@@ -16117,6 +16162,25 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+static SDValue reduceANDOfSetCC(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
+ // (and (i1) f, (setcc c, 0, ne)) -> (select c, f, 0) -> (czero.nez f, c)
+ // (and (i1) f, (setcc c, 0, eq)) -> (select c, 0, f) -> (czero.eqz f, c)
+ // (and (setcc c, 0, ne), (i1) g) -> (select c, g, 0) -> (czero.nez g, c)
+ // (and (setcc c, 0, eq), (i1) g) -> (select c, 0, g) -> (czero.eqz g, c)
+ const bool CzeroOp1 = isCzeroCompatible(N->getOperand(1));
+ if (CzeroOp1 || isCzeroCompatible(N->getOperand(0))) {
+ const SDValue I1Op = CzeroOp1 ? N->getOperand(0) : N->getOperand(1);
+ const SDValue SetCCOp = CzeroOp1 ? N->getOperand(1) : N->getOperand(0);
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SELECT, DL, MVT::i1, SetCCOp, I1Op,
+ DAG.getConstant(0, DL, MVT::i1));
+ }
+ }
+ return SDValue();
+}
+
static SDValue reduceANDOfAtomicLoad(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -16180,7 +16244,8 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
return V;
-
+ if (SDValue V = reduceANDOfSetCC(N, DAG, Subtarget))
+ return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 9e3e2a9443625..7a2eca41b4955 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -603,13 +603,10 @@ class RISCVTargetLowering : public TargetLowering {
/// this override can be removed.
bool mergeStoresAfterLegalization(EVT VT) const override;
- /// Disable normalizing
- /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
- /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
- /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
- bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
- return false;
- }
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
+ SDNode *N) const override;
+
+ bool hasConditionalZero() const override;
/// Disables storing and loading vectors by default when there are function
/// calls between the load and store, since these are more expensive than just
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index d8e2b2c2bf58d..baa6bcb284098 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -8,16 +8,14 @@ define i32 @icmp_and(i64 %x, i64 %y) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and:
; RV64ZICOND: # %bb.0:
-; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
@@ -26,27 +24,50 @@ define i32 @icmp_and(i64 %x, i64 %y) {
ret i32 %6
}
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
+; RV32ZICOND-LABEL: icmp_and_select:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: sgtz a5, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
+; RV32ZICOND-NEXT: czero.nez a2, a2, a3
+; RV32ZICOND-NEXT: or a2, a2, a5
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_select:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: sgtz a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp sgt i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = select i1 %5, i32 %z, i32 0
+ ret i32 %6
+}
+
; (and (and (icmp x, 0, ne), (icmp y, 0, ne)), (icmp z, 0, ne)) -> (czero.eqz (czero.eqz (icmp x, 0, ne), y), z)
define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
; RV32ZICOND-LABEL: icmp_and_and:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: or a4, a4, a5
; RV32ZICOND-NEXT: snez a1, a2
-; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a1, a0
-; RV32ZICOND-NEXT: snez a1, a4
-; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_and:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
-; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a1, a0
-; RV64ZICOND-NEXT: snez a1, a2
-; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
; RV64ZICOND-NEXT: ret
%4 = icmp ne i64 %y, 0
%5 = icmp ne i64 %x, 0
|
|
@llvm/pr-subscribers-backend-risc-v Author: Ryan Buchner (bababuck) ChangesOrigionally lowered to: With optimiztion: Full diff: https://github.com/llvm/llvm-project/pull/166469.diff 7 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 78f63b4406eb0..dac4dee97fbe5 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2489,8 +2489,8 @@ class LLVM_ABI TargetLoweringBase {
/// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
/// that it saves us from materializing N0 and N1 in an integer register.
/// Targets that are able to perform and/or on flags should return false here.
- virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
- EVT VT) const {
+ virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT,
+ SDNode *) const {
// If a target has multiple condition registers, then it likely has logical
// operations on those registers.
if (hasMultipleConditionRegisters(VT))
@@ -2502,6 +2502,10 @@ class LLVM_ABI TargetLoweringBase {
Action != TypeSplitVector;
}
+ // Return true is targets has a conditional zero-ing instruction
+ // i.e. select cond, x, 0
+ virtual bool hasConditionalZero() const { return false; }
+
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
/// Return true if a select of constants (select Cond, C1, C2) should be
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 46c4bb85a7420..8a9ed7a7eac46 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12202,7 +12202,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
// select Cond, T, Cond --> and Cond, freeze(T)
// select Cond, T, 0 --> and Cond, freeze(T)
- if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
+ // select Cond, T, 0 is a conditional zero
+ if (Cond == F || (!TLI.hasConditionalZero() &&
+ isNullOrNullSplat(F, /* AllowUndefs */ true)))
return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
// select Cond, T, 1 --> or (not Cond), freeze(T)
@@ -12213,7 +12215,9 @@ static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
}
// select Cond, 0, F --> and (not Cond), freeze(F)
- if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
+ // select Cond, 0, F is a conditional zero
+ if (!TLI.hasConditionalZero() &&
+ isNullOrNullSplat(T, /* AllowUndefs */ true)) {
SDValue NotCond =
matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
@@ -12387,7 +12391,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// and we always transform to the left side if we know that we can further
// optimize the combination of the conditions.
bool normalizeToSequence =
- TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT, N);
// select (and Cond0, Cond1), X, Y
// -> select Cond0, (select Cond1, X, Y), Y
if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e993b26..e1e9b37d6e5bf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29457,8 +29457,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
return all_equal(ValueVTs);
}
-bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
- EVT) const {
+bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT,
+ SDNode *) const {
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 2cb8ed29f252a..52b1a6ab4c90d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -867,7 +867,8 @@ class AArch64TargetLowering : public TargetLowering {
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
- bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT,
+ SDNode *) const override;
void finalizeLowering(MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e0cf739f67d9b..2d178586a0827 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2763,6 +2763,51 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
+// Can the given operation be interchanged with a Zicond::CZERO operation
+// Must be:
+// - a SETCC instruction
+// - Must compare a value for [in]equality against 0
+static bool isCzeroCompatible(const SDValue Op) {
+ if (Op.getValueType() == MVT::i1 && Op.getOpcode() == ISD::SETCC &&
+ isNullConstant(Op.getOperand(1))) {
+ ISD::CondCode CondCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ return CondCode == ISD::SETNE || CondCode == ISD::SETEQ;
+ }
+ return false;
+}
+
+// Disable normalizing for most cases
+// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+// select(N0|N1, X, Y) => select(N0, Y, select(N1, X, Y))
+// For select(N0, select(N1, X, Y), Y), if Y=0 and N0=setcc(eqz || nez):
+// %N1 = setcc [any_cond] %A, %B
+// %CZ = czero.eqz %N1, X
+// %Res = czero.eqz %N0, %CZ
+// ...
+// But for select(N0&N1, X, Y):
+// %N0 = setcc [eq/ne] %C, 0
+// %N1 = setcc [any_cond] %A, %B
+// %And = and %N0, %N1
+// %Res = czero.eqz %And, %X
+bool RISCVTargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
+ SDNode *N) const {
+ if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
+ assert(
+ N->getOpcode() == ISD::SELECT &&
+ "shouldNormalizeToSelectSequence() called with non-SELECT operation");
+ const SDValue &CondV = N->getOperand(0);
+ const SDValue &TrueV = N->getOperand(1);
+ const SDValue &FalseV = N->getOperand(2);
+ if (CondV.hasOneUse() && isCzeroCompatible(CondV) && isNullConstant(FalseV))
+ return true;
+ }
+ return false;
+}
+
+bool RISCVTargetLowering::hasConditionalZero() const {
+ return Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps();
+}
+
bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
if (!ScalarTy.isSimple())
return false;
@@ -16117,6 +16162,25 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+static SDValue reduceANDOfSetCC(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) {
+ // (and (i1) f, (setcc c, 0, ne)) -> (select c, f, 0) -> (czero.nez f, c)
+ // (and (i1) f, (setcc c, 0, eq)) -> (select c, 0, f) -> (czero.eqz f, c)
+ // (and (setcc c, 0, ne), (i1) g) -> (select c, g, 0) -> (czero.nez g, c)
+ // (and (setcc c, 0, eq), (i1) g) -> (select c, 0, g) -> (czero.eqz g, c)
+ const bool CzeroOp1 = isCzeroCompatible(N->getOperand(1));
+ if (CzeroOp1 || isCzeroCompatible(N->getOperand(0))) {
+ const SDValue I1Op = CzeroOp1 ? N->getOperand(0) : N->getOperand(1);
+ const SDValue SetCCOp = CzeroOp1 ? N->getOperand(1) : N->getOperand(0);
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SELECT, DL, MVT::i1, SetCCOp, I1Op,
+ DAG.getConstant(0, DL, MVT::i1));
+ }
+ }
+ return SDValue();
+}
+
static SDValue reduceANDOfAtomicLoad(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -16180,7 +16244,8 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
return V;
-
+ if (SDValue V = reduceANDOfSetCC(N, DAG, Subtarget))
+ return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 9e3e2a9443625..7a2eca41b4955 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -603,13 +603,10 @@ class RISCVTargetLowering : public TargetLowering {
/// this override can be removed.
bool mergeStoresAfterLegalization(EVT VT) const override;
- /// Disable normalizing
- /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
- /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
- /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
- bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
- return false;
- }
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT VT,
+ SDNode *N) const override;
+
+ bool hasConditionalZero() const override;
/// Disables storing and loading vectors by default when there are function
/// calls between the load and store, since these are more expensive than just
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index d8e2b2c2bf58d..baa6bcb284098 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -8,16 +8,14 @@ define i32 @icmp_and(i64 %x, i64 %y) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and:
; RV64ZICOND: # %bb.0:
-; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
@@ -26,27 +24,50 @@ define i32 @icmp_and(i64 %x, i64 %y) {
ret i32 %6
}
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
+; RV32ZICOND-LABEL: icmp_and_select:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: sgtz a5, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
+; RV32ZICOND-NEXT: czero.nez a2, a2, a3
+; RV32ZICOND-NEXT: or a2, a2, a5
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_select:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: sgtz a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp sgt i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = select i1 %5, i32 %z, i32 0
+ ret i32 %6
+}
+
; (and (and (icmp x, 0, ne), (icmp y, 0, ne)), (icmp z, 0, ne)) -> (czero.eqz (czero.eqz (icmp x, 0, ne), y), z)
define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
; RV32ZICOND-LABEL: icmp_and_and:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: or a4, a4, a5
; RV32ZICOND-NEXT: snez a1, a2
-; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a1, a0
-; RV32ZICOND-NEXT: snez a1, a4
-; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_and:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
-; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a1, a0
-; RV64ZICOND-NEXT: snez a1, a2
-; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
; RV64ZICOND-NEXT: ret
%4 = icmp ne i64 %y, 0
%5 = icmp ne i64 %x, 0
|
|
Posted this a while back when I was a little unsure how to proceed, reposting with a re-fined version. I am a little unsure if adding |
|
(and (icmp x, 0, eq), (icmp y, 0, eq)) can be done as (icmp eq (or x, y), 0) which is what InstCombine transforms it to. https://godbolt.org/z/Mq8z198eW |
Do you think it is worthwhile to exclude for this specific case? I wouldn't expect this patch to interfere with that existing behavior since ./bin/clang -O3 -S -march=rv64gcbv_zicond_zvl512b c.c -o c.S |
|
Thanks for the feedback! Address the above comments in the latest push:
|
I don't think we need to exclude it. Just update the title and the description to a more interesting case. Even llc seems like it knows the OR trick. https://godbolt.org/z/E1Mh3nzzf |
Updated issue/description to the |
Origionally lowered to:
With optimiztion: