-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] Add nocreateundeforpoison annotations
#166450
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This commit goes through IntrinsicsAMDGPU.td and adds `nocreateundeforpoison` to intrinsics that (to my knowledge) perform arithmetic operations that are defined everywhere (so no bitfield extracts and such since those can have invalid inputs, and similarly for permutations). To acieve this, the byte selecetor arguments on some of the small-float conversion intrinsics have been given range() annotations so that values not in [0, 3] are a verifier error as the documentation says they should be.
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-amdgpu Author: Krzysztof Drewniak (krzysz00) ChangesThis commit goes through IntrinsicsAMDGPU.td and adds To acieve this, the byte selecetor arguments on some of the small-float conversion intrinsics have been given range() annotations so that values not in [0, 3] are a verifier error as the documentation says they should be. Patch is 71.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166450.diff 13 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8e35109061792..55a1272d58cb9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -398,32 +398,31 @@ def int_amdgcn_div_scale : DefaultAttrsIntrinsic<
// (0 = Denominator, 1 = Numerator).
[llvm_anyfloat_ty, llvm_i1_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<2>>]
>;
+// Floating-point arithmetic intrinsics (and integer conversions) not handled
+// elsewhere.
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in {
+
def int_amdgcn_div_fmas : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem, IntrSpeculatable]
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty]
>;
def int_amdgcn_div_fixup : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
->;
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
// Look Up 2.0 / pi src0 with segment select src1[4:0]
def int_amdgcn_trig_preop : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]
>;
def int_amdgcn_sin : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_cos : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
// v_log_{f16|f32}, performs log2. f32 version does not handle
@@ -431,7 +430,7 @@ def int_amdgcn_cos : DefaultAttrsIntrinsic<
// support denormals, and the generic log2 intrinsic should be
// preferred.
def int_amdgcn_log : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
// v_exp_{f16|f32} (int_amdgcn_exp was taken by export
@@ -440,145 +439,162 @@ def int_amdgcn_log : DefaultAttrsIntrinsic<
// support denormals, and the generic exp2 intrinsic should be
// preferred.
def int_amdgcn_exp2 : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_log_clamp : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
->;
-
-def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]
->;
-
-// Fused single-precision multiply-add with legacy behaviour for the multiply,
-// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
-// intended for use on subtargets that have the v_fma_legacy_f32 and/or
-// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
-// has a completely different kind of legacy behaviour.)
-def int_amdgcn_fma_legacy :
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_rcp : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]
>;
def int_amdgcn_sqrt : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_rsq : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">,
DefaultAttrsIntrinsic<
- [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+ [llvm_float_ty], [llvm_float_ty]
>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
+>;
def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_frexp_exp : DefaultAttrsIntrinsic<
- [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyint_ty], [llvm_anyfloat_ty]
>;
// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
// and always uses rtz, so is not suitable for implementing the OpenCL
// fract function. It should be ok on VI.
def int_amdgcn_fract : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
- DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cvt_pknorm_i16 :
ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
- DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cvt_pknorm_u16 :
ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
- DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cvt_pk_i16 :
ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
DefaultAttrsIntrinsic<
- [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty]
>;
def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
- DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty]
>;
def int_amdgcn_class : DefaultAttrsIntrinsic<
- [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty]
>;
def int_amdgcn_fmed3 :
DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]
>;
def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">,
DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">,
DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">,
DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">,
DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
// should be used.
def int_amdgcn_sffbh :
- DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+ DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]
>;
// v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support.
def int_amdgcn_fmad_ftz :
DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]
+>;
+
+def int_amdgcn_tanh : DefaultAttrsIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
+>;
+
+def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
+ [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
+
+def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
+ [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
+
+def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
+ [llvm_v2f16_ty], [llvm_i16_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
+
+def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
+ [llvm_v2f16_ty], [llvm_i16_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
+
+def int_amdgcn_cvt_pk_fp8_f16
+ : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
+
+def int_amdgcn_cvt_pk_bf8_f16
+ : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
+
+} // end let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
+
+def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">,
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative, IntrNoCreateUndefOrPoison]
+>;
+
+// Fused single-precision multiply-add with legacy behaviour for the multiply,
+// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
+// intended for use on subtargets that have the v_fma_legacy_f32 and/or
+// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
+// has a completely different kind of legacy behaviour.)
+def int_amdgcn_fma_legacy :
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative, IntrNoCreateUndefOrPoison]
>;
// FIXME: The m0 argument should be moved after the normal arguments
@@ -619,61 +635,37 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
-def int_amdgcn_tanh : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
->;
-
-def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
- [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
-
-def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
- [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
-
-def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
- [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
-
-def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
- [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
-
-def int_amdgcn_cvt_pk_fp8_f16
- : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
- [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
-
-def int_amdgcn_cvt_pk_bf8_f16
- : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
- [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
-
// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
// byte_sel selects byte to write in vdst.
def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;
// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
// byte_sel selects byte to write in vdst.
def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15]
class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+ [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 16>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+ [DstTy], [Src0Ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+ [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
def int_amdgcn_cvt_scale_pk8_f16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_fp8">;
@@ -693,7 +685,8 @@ def int_amdgcn_cvt_scale_pk16_f32_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty,
def int_amdgcn_cvt_scale_pk16_f32_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty, llvm_v3i32_ty, "cvt_scale_pk16_f32_bf6">;
class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+ [DstTy], [Src0Ty, Src1Ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_fp6_f16">;
@@ -746,7 +739,8 @@ class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : Def
[llvm_i32_ty, // src
llvm_float_ty, // scale
llvm_i32_ty], // src_sel index [0..3]
- [IntrNoMem, ImmArg<ArgIndex<2>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
@@ -754,7 +748,7 @@ class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic<LLVMType DstTy, string name> : Defau
[llvm_i32_ty, // src
llvm_float_ty, // scale
llvm_i1_ty], // src_lo_hi_sel[true false]
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<2>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic<LLVMType SrcTy, string name> : DefaultAttrsIntrinsic<
@@ -763,7 +757,7 @@ class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic<LLVMType SrcTy, string nam
SrcTy, // src
llvm_float_ty, // scale
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<3>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic<string name> : DefaultAttrsIntrinsic<
@@ -773,7 +767,7 @@ class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic<string name> : DefaultAttrsInt
llvm_float_ty, // src1
llvm_float_ty, // scale
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, ImmArg<ArgIndex<4>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<4>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
@@ -783,7 +777,8 @@ class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic<LLVMType DstTy, string name> :
llvm_float_ty, // scale
llvm_i32_ty, // src_sel_index[0..3]
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]
+ [IntrNoMem, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>, ImmArg<ArgIndex<4>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : DefaultAttrsIntrinsic<
@@ -793,7 +788,8 @@ class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : DefaultAttrsIntrinsic<
llvm_float_ty, // src1
llvm_float_ty, // scale
llvm_i32_ty], // dst_sel_index[0..3]
- [IntrNoMem, ImmArg<ArgIndex<4>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : DefaultAttrsIntrinsic<
@@ -802,7 +798,8 @@ class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : De
SrcTy, // src
llvm_float_ty, // scale
llvm_i32_ty], // dest_sel_index [0..3]
- [IntrNoMem, ImmArg<ArgIndex<3>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
@@ -812,7 +809,8 @@ class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, st
llvm_i32_ty, // seed
llvm_float_ty, // scale
llvm_i32_ty], // dst_sel_index[0..3]
- [IntrNoMem, ImmArg<ArgIndex<4>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
@@ -821,7 +819,7 @@ class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<LLVMType DstTy, string name
llvm_float_ty, // src0
llvm_i32_ty, // seed
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, ImmArg<ArgIndex<3>>]
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<3>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
def int_amdgcn_cvt_sr_bf16_f32: AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<llvm_v2bf16_ty, "cvt_sr_bf16_f32">;
@@ -890,13 +888,13 @@ def int_amdgcn_cvt_scalef32_sr_fp8_f16: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8Tied
def int_amdgcn_cvt_scalef32_sr_fp8_f32: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<llvm_float_ty, "cvt_scalef32_sr_fp8_f32">;
def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
- [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]
+ [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrNoCreateUndefOrPoison]
>, ClangBuiltin<"__builtin_amdgcn_prng_b32">;
def int_amdgcn_bitop3 :
DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
+ [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<3>>]>;
} // TargetPrefix = "amdgcn"
@@ -1435,7 +1...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This probably should go in DefaultAttrsIntrinsic. With the exception of the intrinsics with the explicit flags for the poison generating behavior, this applies to early everything
|
Re general applicability ... I'm not so sure about that? A load, for example, can return a poison, no? |
Doesn't matter, intrinsics have to explicitly opt-in to use the default attributes. This is a majority vote question among existing intrinsics. |
This commit goes through IntrinsicsAMDGPU.td and adds
nocreateundeforpoisonto intrinsics that (to my knowledge) perform arithmetic operations that are defined everywhere (so no bitfield extracts and such since those can have invalid inputs, and similarly for permutations).To achieve this, the byte selector arguments on some of the small-float conversion intrinsics have been given range() annotations so that values not in [0, 3] are a verifier error as the documentation says they should be.