Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 43 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16117,6 +16117,46 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}

// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (!Subtarget.hasCZEROLike())
return SDValue();

SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);

auto IsEqualCompZero = [](SDValue &V) -> bool {
if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
if (ISD::isIntEqualitySetCC(CC))
return true;
}
return false;
};

if (!IsEqualCompZero(N0) || !N0.hasOneUse())
std::swap(N0, N1);
if (!IsEqualCompZero(N0) || !N0.hasOneUse())
return SDValue();

KnownBits Known = DAG.computeKnownBits(N1);
if (Known.getMaxValue().ugt(1))
return SDValue();

unsigned CzeroOpcode =
(cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
? RISCVISD::CZERO_EQZ
: RISCVISD::CZERO_NEZ;

EVT VT = N->getValueType(0);
SDLoc DL(N);
return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
}

static SDValue reduceANDOfAtomicLoad(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -16180,7 +16220,9 @@ static SDValue performANDCombine(SDNode *N,

if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
return V;

if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
Expand Down
42 changes: 19 additions & 23 deletions llvm/test/CodeGen/RISCV/xaluo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1834,21 +1834,20 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV32ZICOND-NEXT: mul a5, a3, a0
; RV32ZICOND-NEXT: mul a6, a1, a2
; RV32ZICOND-NEXT: mulhu a7, a0, a2
; RV32ZICOND-NEXT: snez t0, a3
; RV32ZICOND-NEXT: add a5, a6, a5
; RV32ZICOND-NEXT: snez a6, a3
; RV32ZICOND-NEXT: mulhu a3, a3, a0
; RV32ZICOND-NEXT: mul t1, a0, a2
; RV32ZICOND-NEXT: mul t0, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a1, a2
; RV32ZICOND-NEXT: snez a1, a1
; RV32ZICOND-NEXT: add a5, a6, a5
; RV32ZICOND-NEXT: and a1, a1, t0
; RV32ZICOND-NEXT: czero.eqz a1, a6, a1
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: snez a2, a3
; RV32ZICOND-NEXT: add a5, a7, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: sltu a1, a5, a7
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: sw t1, 0(a4)
; RV32ZICOND-NEXT: sw t0, 0(a4)
; RV32ZICOND-NEXT: sw a5, 4(a4)
; RV32ZICOND-NEXT: ret
;
Expand Down Expand Up @@ -3690,11 +3689,10 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: snez a6, a3
; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: snez a5, a1
; RV32ZICOND-NEXT: and a5, a5, a6
; RV32ZICOND-NEXT: mulhu a6, a1, a2
; RV32ZICOND-NEXT: snez a6, a6
; RV32ZICOND-NEXT: or a5, a5, a6
; RV32ZICOND-NEXT: mulhu a5, a1, a2
; RV32ZICOND-NEXT: czero.eqz a6, a6, a1
; RV32ZICOND-NEXT: snez a5, a5
; RV32ZICOND-NEXT: or a5, a6, a5
; RV32ZICOND-NEXT: mulhu a6, a0, a2
; RV32ZICOND-NEXT: add a4, a6, a4
; RV32ZICOND-NEXT: sltu a4, a4, a6
Expand Down Expand Up @@ -3783,18 +3781,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: mulhu a6, a0, a2
; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
; RV32ZICOND-NEXT: snez a1, a1
; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: and a1, a1, a3
; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: add a4, a6, a4
; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
; RV32ZICOND-NEXT: sltu a2, a4, a6
; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: xori a0, a0, 1
Expand Down Expand Up @@ -5156,18 +5153,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: mulhu a6, a0, a2
; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
; RV32ZICOND-NEXT: snez a1, a1
; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: and a1, a1, a3
; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: add a4, a6, a4
; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
; RV32ZICOND-NEXT: sltu a2, a4, a6
; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: beqz a0, .LBB64_2
Expand Down
130 changes: 118 additions & 12 deletions llvm/test/CodeGen/RISCV/zicond-opts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,132 @@ define i32 @icmp_and(i64 %x, i64 %y) {
; RV32ZICOND-LABEL: icmp_and:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
%5 = and i1 %4, %3
%6 = zext i1 %5 to i32
ret i32 %6
}

; Make sure we choose the replace the single use icmp
define i32 @icmp_and_x_multiple_uses(i64 %x, i64 %y) {
; RV32ZICOND-LABEL: icmp_and_x_multiple_uses:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: czero.eqz a1, a0, a2
; RV32ZICOND-NEXT: add a0, a1, a0
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_x_multiple_uses:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a0, a0
; RV64ZICOND-NEXT: czero.eqz a1, a0, a1
; RV64ZICOND-NEXT: add a0, a1, a0
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
%5 = and i1 %4, %3
%6 = zext i1 %5 to i32
%7 = zext i1 %4 to i32
%8 = add i32 %6, %7
ret i32 %8
}

; Make sure we choose the replace the single use icmp
define i32 @icmp_and_y_multiple_uses(i64 %x, i64 %y) {
; RV32ZICOND-LABEL: icmp_and_y_multiple_uses:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
; RV32ZICOND-NEXT: add a0, a0, a2
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_y_multiple_uses:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
; RV64ZICOND-NEXT: add a0, a0, a1
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
%5 = and i1 %4, %3
%6 = zext i1 %5 to i32
%7 = zext i1 %3 to i32
%8 = add i32 %6, %7
ret i32 %8
}

; Both icmp's have multiple uses, don't optimize
define i32 @icmp_and_xy_multiple_uses(i64 %x, i64 %y) {
; RV32ZICOND-LABEL: icmp_and_xy_multiple_uses:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: and a0, a0, a1
; RV32ZICOND-NEXT: and a2, a0, a1
; RV32ZICOND-NEXT: add a0, a1, a0
; RV32ZICOND-NEXT: add a0, a2, a0
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and:
; RV64ZICOND-LABEL: icmp_and_xy_multiple_uses:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
; RV64ZICOND-NEXT: and a0, a0, a1
; RV64ZICOND-NEXT: and a2, a0, a1
; RV64ZICOND-NEXT: add a0, a1, a0
; RV64ZICOND-NEXT: add a0, a2, a0
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
%5 = and i1 %4, %3
%6 = zext i1 %5 to i32
%7 = zext i1 %3 to i32
%8 = zext i1 %4 to i32
%9 = add i32 %6, %7
%10 = add i32 %9, %8
ret i32 %10
}


; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
; RV32ZICOND-LABEL: icmp_and_select:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: sgtz a5, a3
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
; RV32ZICOND-NEXT: czero.nez a2, a2, a3
; RV32ZICOND-NEXT: or a2, a2, a5
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_select:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: sgtz a1, a1
; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
; RV64ZICOND-NEXT: ret
%3 = icmp sgt i64 %y, 0
%4 = icmp ne i64 %x, 0
%5 = and i1 %4, %3
%6 = select i1 %5, i32 %z, i32 0
ret i32 %6
}

Expand All @@ -32,21 +142,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: or a4, a4, a5
; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: and a0, a1, a0
; RV32ZICOND-NEXT: snez a1, a4
; RV32ZICOND-NEXT: and a0, a1, a0
; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
; RV32ZICOND-NEXT: or a4, a4, a5
; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_and:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
; RV64ZICOND-NEXT: and a0, a1, a0
; RV64ZICOND-NEXT: snez a1, a2
; RV64ZICOND-NEXT: and a0, a1, a0
; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
; RV64ZICOND-NEXT: ret
%4 = icmp ne i64 %y, 0
%5 = icmp ne i64 %x, 0
Expand Down