Skip to content

Commit 856ef96

Browse files
authored
[RISCV] Optimize (and (icmp x, 0, neq), (icmp y, 0, neq)) utilizing zicond extension
PR #166469 ``` %1 = icmp x, 0, neq %2 = icmp y, 0, neq %3 = and %1, %2 ``` Origionally lowered to: ``` %1 = snez x %2 = snez y %3 = and %1, %2 ``` With optimiztion: ``` %1 = snez x %3 = czero.eqz %1, y ```
1 parent a7bf45a commit 856ef96

File tree

3 files changed

+180
-36
lines changed

3 files changed

+180
-36
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16117,6 +16117,46 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
1611716117
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
1611816118
}
1611916119

16120+
// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
16121+
// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
16122+
// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
16123+
// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
16124+
static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG,
16125+
const RISCVSubtarget &Subtarget) {
16126+
if (!Subtarget.hasCZEROLike())
16127+
return SDValue();
16128+
16129+
SDValue N0 = N->getOperand(0);
16130+
SDValue N1 = N->getOperand(1);
16131+
16132+
auto IsEqualCompZero = [](SDValue &V) -> bool {
16133+
if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
16134+
ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
16135+
if (ISD::isIntEqualitySetCC(CC))
16136+
return true;
16137+
}
16138+
return false;
16139+
};
16140+
16141+
if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16142+
std::swap(N0, N1);
16143+
if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16144+
return SDValue();
16145+
16146+
KnownBits Known = DAG.computeKnownBits(N1);
16147+
if (Known.getMaxValue().ugt(1))
16148+
return SDValue();
16149+
16150+
unsigned CzeroOpcode =
16151+
(cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
16152+
? RISCVISD::CZERO_EQZ
16153+
: RISCVISD::CZERO_NEZ;
16154+
16155+
EVT VT = N->getValueType(0);
16156+
SDLoc DL(N);
16157+
return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
16158+
}
16159+
1612016160
static SDValue reduceANDOfAtomicLoad(SDNode *N,
1612116161
TargetLowering::DAGCombinerInfo &DCI) {
1612216162
SelectionDAG &DAG = DCI.DAG;
@@ -16180,7 +16220,9 @@ static SDValue performANDCombine(SDNode *N,
1618016220

1618116221
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
1618216222
return V;
16183-
16223+
if (DCI.isAfterLegalizeDAG())
16224+
if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
16225+
return V;
1618416226
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
1618516227
return V;
1618616228
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

llvm/test/CodeGen/RISCV/xaluo.ll

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,21 +1834,20 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
18341834
; RV32ZICOND-NEXT: mul a5, a3, a0
18351835
; RV32ZICOND-NEXT: mul a6, a1, a2
18361836
; RV32ZICOND-NEXT: mulhu a7, a0, a2
1837-
; RV32ZICOND-NEXT: snez t0, a3
1837+
; RV32ZICOND-NEXT: add a5, a6, a5
1838+
; RV32ZICOND-NEXT: snez a6, a3
18381839
; RV32ZICOND-NEXT: mulhu a3, a3, a0
1839-
; RV32ZICOND-NEXT: mul t1, a0, a2
1840+
; RV32ZICOND-NEXT: mul t0, a0, a2
18401841
; RV32ZICOND-NEXT: mulhu a0, a1, a2
1841-
; RV32ZICOND-NEXT: snez a1, a1
1842-
; RV32ZICOND-NEXT: add a5, a6, a5
1843-
; RV32ZICOND-NEXT: and a1, a1, t0
1842+
; RV32ZICOND-NEXT: czero.eqz a1, a6, a1
18441843
; RV32ZICOND-NEXT: snez a0, a0
18451844
; RV32ZICOND-NEXT: snez a2, a3
18461845
; RV32ZICOND-NEXT: add a5, a7, a5
18471846
; RV32ZICOND-NEXT: or a0, a1, a0
18481847
; RV32ZICOND-NEXT: sltu a1, a5, a7
18491848
; RV32ZICOND-NEXT: or a0, a0, a2
18501849
; RV32ZICOND-NEXT: or a0, a0, a1
1851-
; RV32ZICOND-NEXT: sw t1, 0(a4)
1850+
; RV32ZICOND-NEXT: sw t0, 0(a4)
18521851
; RV32ZICOND-NEXT: sw a5, 4(a4)
18531852
; RV32ZICOND-NEXT: ret
18541853
;
@@ -3690,11 +3689,10 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
36903689
; RV32ZICOND-NEXT: mul a5, a1, a2
36913690
; RV32ZICOND-NEXT: snez a6, a3
36923691
; RV32ZICOND-NEXT: add a4, a5, a4
3693-
; RV32ZICOND-NEXT: snez a5, a1
3694-
; RV32ZICOND-NEXT: and a5, a5, a6
3695-
; RV32ZICOND-NEXT: mulhu a6, a1, a2
3696-
; RV32ZICOND-NEXT: snez a6, a6
3697-
; RV32ZICOND-NEXT: or a5, a5, a6
3692+
; RV32ZICOND-NEXT: mulhu a5, a1, a2
3693+
; RV32ZICOND-NEXT: czero.eqz a6, a6, a1
3694+
; RV32ZICOND-NEXT: snez a5, a5
3695+
; RV32ZICOND-NEXT: or a5, a6, a5
36983696
; RV32ZICOND-NEXT: mulhu a6, a0, a2
36993697
; RV32ZICOND-NEXT: add a4, a6, a4
37003698
; RV32ZICOND-NEXT: sltu a4, a4, a6
@@ -3783,18 +3781,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
37833781
; RV32ZICOND: # %bb.0: # %entry
37843782
; RV32ZICOND-NEXT: mul a4, a3, a0
37853783
; RV32ZICOND-NEXT: mul a5, a1, a2
3786-
; RV32ZICOND-NEXT: mulhu a6, a0, a2
3784+
; RV32ZICOND-NEXT: add a4, a5, a4
3785+
; RV32ZICOND-NEXT: mulhu a5, a0, a2
37873786
; RV32ZICOND-NEXT: mulhu a0, a3, a0
37883787
; RV32ZICOND-NEXT: snez a3, a3
37893788
; RV32ZICOND-NEXT: mulhu a2, a1, a2
3790-
; RV32ZICOND-NEXT: snez a1, a1
3791-
; RV32ZICOND-NEXT: add a4, a5, a4
3792-
; RV32ZICOND-NEXT: and a1, a1, a3
3789+
; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
37933790
; RV32ZICOND-NEXT: snez a2, a2
37943791
; RV32ZICOND-NEXT: snez a0, a0
3795-
; RV32ZICOND-NEXT: add a4, a6, a4
3792+
; RV32ZICOND-NEXT: add a4, a5, a4
37963793
; RV32ZICOND-NEXT: or a1, a1, a2
3797-
; RV32ZICOND-NEXT: sltu a2, a4, a6
3794+
; RV32ZICOND-NEXT: sltu a2, a4, a5
37983795
; RV32ZICOND-NEXT: or a0, a1, a0
37993796
; RV32ZICOND-NEXT: or a0, a0, a2
38003797
; RV32ZICOND-NEXT: xori a0, a0, 1
@@ -5156,18 +5153,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
51565153
; RV32ZICOND: # %bb.0: # %entry
51575154
; RV32ZICOND-NEXT: mul a4, a3, a0
51585155
; RV32ZICOND-NEXT: mul a5, a1, a2
5159-
; RV32ZICOND-NEXT: mulhu a6, a0, a2
5156+
; RV32ZICOND-NEXT: add a4, a5, a4
5157+
; RV32ZICOND-NEXT: mulhu a5, a0, a2
51605158
; RV32ZICOND-NEXT: mulhu a0, a3, a0
51615159
; RV32ZICOND-NEXT: snez a3, a3
51625160
; RV32ZICOND-NEXT: mulhu a2, a1, a2
5163-
; RV32ZICOND-NEXT: snez a1, a1
5164-
; RV32ZICOND-NEXT: add a4, a5, a4
5165-
; RV32ZICOND-NEXT: and a1, a1, a3
5161+
; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
51665162
; RV32ZICOND-NEXT: snez a2, a2
51675163
; RV32ZICOND-NEXT: snez a0, a0
5168-
; RV32ZICOND-NEXT: add a4, a6, a4
5164+
; RV32ZICOND-NEXT: add a4, a5, a4
51695165
; RV32ZICOND-NEXT: or a1, a1, a2
5170-
; RV32ZICOND-NEXT: sltu a2, a4, a6
5166+
; RV32ZICOND-NEXT: sltu a2, a4, a5
51715167
; RV32ZICOND-NEXT: or a0, a1, a0
51725168
; RV32ZICOND-NEXT: or a0, a0, a2
51735169
; RV32ZICOND-NEXT: beqz a0, .LBB64_2

llvm/test/CodeGen/RISCV/zicond-opts.ll

Lines changed: 118 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,132 @@ define i32 @icmp_and(i64 %x, i64 %y) {
77
; RV32ZICOND-LABEL: icmp_and:
88
; RV32ZICOND: # %bb.0:
99
; RV32ZICOND-NEXT: or a2, a2, a3
10+
; RV32ZICOND-NEXT: snez a2, a2
11+
; RV32ZICOND-NEXT: or a0, a0, a1
12+
; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
13+
; RV32ZICOND-NEXT: ret
14+
;
15+
; RV64ZICOND-LABEL: icmp_and:
16+
; RV64ZICOND: # %bb.0:
17+
; RV64ZICOND-NEXT: snez a1, a1
18+
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
19+
; RV64ZICOND-NEXT: ret
20+
%3 = icmp ne i64 %y, 0
21+
%4 = icmp ne i64 %x, 0
22+
%5 = and i1 %4, %3
23+
%6 = zext i1 %5 to i32
24+
ret i32 %6
25+
}
26+
27+
; Make sure we choose the replace the single use icmp
28+
define i32 @icmp_and_x_multiple_uses(i64 %x, i64 %y) {
29+
; RV32ZICOND-LABEL: icmp_and_x_multiple_uses:
30+
; RV32ZICOND: # %bb.0:
31+
; RV32ZICOND-NEXT: or a2, a2, a3
32+
; RV32ZICOND-NEXT: or a0, a0, a1
33+
; RV32ZICOND-NEXT: snez a0, a0
34+
; RV32ZICOND-NEXT: czero.eqz a1, a0, a2
35+
; RV32ZICOND-NEXT: add a0, a1, a0
36+
; RV32ZICOND-NEXT: ret
37+
;
38+
; RV64ZICOND-LABEL: icmp_and_x_multiple_uses:
39+
; RV64ZICOND: # %bb.0:
40+
; RV64ZICOND-NEXT: snez a0, a0
41+
; RV64ZICOND-NEXT: czero.eqz a1, a0, a1
42+
; RV64ZICOND-NEXT: add a0, a1, a0
43+
; RV64ZICOND-NEXT: ret
44+
%3 = icmp ne i64 %y, 0
45+
%4 = icmp ne i64 %x, 0
46+
%5 = and i1 %4, %3
47+
%6 = zext i1 %5 to i32
48+
%7 = zext i1 %4 to i32
49+
%8 = add i32 %6, %7
50+
ret i32 %8
51+
}
52+
53+
; Make sure we choose the replace the single use icmp
54+
define i32 @icmp_and_y_multiple_uses(i64 %x, i64 %y) {
55+
; RV32ZICOND-LABEL: icmp_and_y_multiple_uses:
56+
; RV32ZICOND: # %bb.0:
57+
; RV32ZICOND-NEXT: or a2, a2, a3
58+
; RV32ZICOND-NEXT: snez a2, a2
59+
; RV32ZICOND-NEXT: or a0, a0, a1
60+
; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
61+
; RV32ZICOND-NEXT: add a0, a0, a2
62+
; RV32ZICOND-NEXT: ret
63+
;
64+
; RV64ZICOND-LABEL: icmp_and_y_multiple_uses:
65+
; RV64ZICOND: # %bb.0:
66+
; RV64ZICOND-NEXT: snez a1, a1
67+
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
68+
; RV64ZICOND-NEXT: add a0, a0, a1
69+
; RV64ZICOND-NEXT: ret
70+
%3 = icmp ne i64 %y, 0
71+
%4 = icmp ne i64 %x, 0
72+
%5 = and i1 %4, %3
73+
%6 = zext i1 %5 to i32
74+
%7 = zext i1 %3 to i32
75+
%8 = add i32 %6, %7
76+
ret i32 %8
77+
}
78+
79+
; Both icmp's have multiple uses, don't optimize
80+
define i32 @icmp_and_xy_multiple_uses(i64 %x, i64 %y) {
81+
; RV32ZICOND-LABEL: icmp_and_xy_multiple_uses:
82+
; RV32ZICOND: # %bb.0:
83+
; RV32ZICOND-NEXT: or a2, a2, a3
1084
; RV32ZICOND-NEXT: or a0, a0, a1
1185
; RV32ZICOND-NEXT: snez a1, a2
1286
; RV32ZICOND-NEXT: snez a0, a0
13-
; RV32ZICOND-NEXT: and a0, a0, a1
87+
; RV32ZICOND-NEXT: and a2, a0, a1
88+
; RV32ZICOND-NEXT: add a0, a1, a0
89+
; RV32ZICOND-NEXT: add a0, a2, a0
1490
; RV32ZICOND-NEXT: ret
1591
;
16-
; RV64ZICOND-LABEL: icmp_and:
92+
; RV64ZICOND-LABEL: icmp_and_xy_multiple_uses:
1793
; RV64ZICOND: # %bb.0:
1894
; RV64ZICOND-NEXT: snez a1, a1
1995
; RV64ZICOND-NEXT: snez a0, a0
20-
; RV64ZICOND-NEXT: and a0, a0, a1
96+
; RV64ZICOND-NEXT: and a2, a0, a1
97+
; RV64ZICOND-NEXT: add a0, a1, a0
98+
; RV64ZICOND-NEXT: add a0, a2, a0
2199
; RV64ZICOND-NEXT: ret
22100
%3 = icmp ne i64 %y, 0
23101
%4 = icmp ne i64 %x, 0
24102
%5 = and i1 %4, %3
25103
%6 = zext i1 %5 to i32
104+
%7 = zext i1 %3 to i32
105+
%8 = zext i1 %4 to i32
106+
%9 = add i32 %6, %7
107+
%10 = add i32 %9, %8
108+
ret i32 %10
109+
}
110+
111+
112+
; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
113+
define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
114+
; RV32ZICOND-LABEL: icmp_and_select:
115+
; RV32ZICOND: # %bb.0:
116+
; RV32ZICOND-NEXT: sgtz a5, a3
117+
; RV32ZICOND-NEXT: snez a2, a2
118+
; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
119+
; RV32ZICOND-NEXT: czero.nez a2, a2, a3
120+
; RV32ZICOND-NEXT: or a2, a2, a5
121+
; RV32ZICOND-NEXT: or a0, a0, a1
122+
; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
123+
; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
124+
; RV32ZICOND-NEXT: ret
125+
;
126+
; RV64ZICOND-LABEL: icmp_and_select:
127+
; RV64ZICOND: # %bb.0:
128+
; RV64ZICOND-NEXT: sgtz a1, a1
129+
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
130+
; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
131+
; RV64ZICOND-NEXT: ret
132+
%3 = icmp sgt i64 %y, 0
133+
%4 = icmp ne i64 %x, 0
134+
%5 = and i1 %4, %3
135+
%6 = select i1 %5, i32 %z, i32 0
26136
ret i32 %6
27137
}
28138

@@ -32,21 +142,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
32142
; RV32ZICOND: # %bb.0:
33143
; RV32ZICOND-NEXT: or a2, a2, a3
34144
; RV32ZICOND-NEXT: or a0, a0, a1
35-
; RV32ZICOND-NEXT: or a4, a4, a5
36-
; RV32ZICOND-NEXT: snez a1, a2
37145
; RV32ZICOND-NEXT: snez a0, a0
38-
; RV32ZICOND-NEXT: and a0, a1, a0
39-
; RV32ZICOND-NEXT: snez a1, a4
40-
; RV32ZICOND-NEXT: and a0, a1, a0
146+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
147+
; RV32ZICOND-NEXT: or a4, a4, a5
148+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
41149
; RV32ZICOND-NEXT: ret
42150
;
43151
; RV64ZICOND-LABEL: icmp_and_and:
44152
; RV64ZICOND: # %bb.0:
45-
; RV64ZICOND-NEXT: snez a1, a1
46153
; RV64ZICOND-NEXT: snez a0, a0
47-
; RV64ZICOND-NEXT: and a0, a1, a0
48-
; RV64ZICOND-NEXT: snez a1, a2
49-
; RV64ZICOND-NEXT: and a0, a1, a0
154+
; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
155+
; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
50156
; RV64ZICOND-NEXT: ret
51157
%4 = icmp ne i64 %y, 0
52158
%5 = icmp ne i64 %x, 0

0 commit comments

Comments
 (0)