[AMDGPU] Add `nocreateundeforpoison` annotations #166450

krzysz00 · 2025-11-04T21:41:32Z

This commit goes through IntrinsicsAMDGPU.td and adds nocreateundeforpoison to intrinsics that (to my knowledge) perform arithmetic operations that are defined everywhere (so no bitfield extracts and such since those can have invalid inputs, and similarly for permutations).

To achieve this, the byte selector arguments on some of the small-float conversion intrinsics have been given range() annotations so that values not in [0, 3] are a verifier error as the documentation says they should be.

This commit goes through IntrinsicsAMDGPU.td and adds `nocreateundeforpoison` to intrinsics that (to my knowledge) perform arithmetic operations that are defined everywhere (so no bitfield extracts and such since those can have invalid inputs, and similarly for permutations). To acieve this, the byte selecetor arguments on some of the small-float conversion intrinsics have been given range() annotations so that values not in [0, 3] are a verifier error as the documentation says they should be.

llvmbot · 2025-11-04T21:42:02Z

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-amdgpu

Author: Krzysztof Drewniak (krzysz00)

Changes

This commit goes through IntrinsicsAMDGPU.td and adds nocreateundeforpoison to intrinsics that (to my knowledge) perform arithmetic operations that are defined everywhere (so no bitfield extracts and such since those can have invalid inputs, and similarly for permutations).

To acieve this, the byte selecetor arguments on some of the small-float conversion intrinsics have been given range() annotations so that values not in [0, 3] are a verifier error as the documentation says they should be.

Patch is 71.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166450.diff

13 Files Affected:

(modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+178-174)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll (+5-3)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll (+4-3)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll (+5-3)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll (+7-6)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll (+5-3)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll (+9-7)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll (+1-1)
(modified) llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll (+6-3)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8e35109061792..55a1272d58cb9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -398,32 +398,31 @@ def int_amdgcn_div_scale : DefaultAttrsIntrinsic<
   //                (0 = Denominator, 1 = Numerator).
   [llvm_anyfloat_ty, llvm_i1_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<2>>]
 >;
 
+// Floating-point arithmetic intrinsics (and integer conversions) not handled
+// elsewhere.
+let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in {
+
 def int_amdgcn_div_fmas : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
-  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-  [IntrNoMem, IntrSpeculatable]
+  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty]
 >;
 
 def int_amdgcn_div_fixup : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
-  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-  [IntrNoMem, IntrSpeculatable]
->;
+  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
 
 // Look Up 2.0 / pi src0 with segment select src1[4:0]
 def int_amdgcn_trig_preop : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]
 >;
 
 def int_amdgcn_sin : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>],
-  [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_cos : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 // v_log_{f16|f32}, performs log2. f32 version does not handle
@@ -431,7 +430,7 @@ def int_amdgcn_cos : DefaultAttrsIntrinsic<
 // support denormals, and the generic log2 intrinsic should be
 // preferred.
 def int_amdgcn_log : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 // v_exp_{f16|f32} (int_amdgcn_exp was taken by export
@@ -440,145 +439,162 @@ def int_amdgcn_log : DefaultAttrsIntrinsic<
 // support denormals, and the generic exp2 intrinsic should be
 // preferred.
 def int_amdgcn_exp2 : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_log_clamp : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
->;
-
-def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">,
-  DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable, Commutative]
->;
-
-// Fused single-precision multiply-add with legacy behaviour for the multiply,
-// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
-// intended for use on subtargets that have the v_fma_legacy_f32 and/or
-// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
-// has a completely different kind of legacy behaviour.)
-def int_amdgcn_fma_legacy :
-  DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable, Commutative]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_rcp : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">,
-  DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+  DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]
 >;
 
 def int_amdgcn_sqrt :  DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_rsq :  DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_rsq_legacy :  ClangBuiltin<"__builtin_amdgcn_rsq_legacy">,
   DefaultAttrsIntrinsic<
-  [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+  [llvm_float_ty], [llvm_float_ty]
 >;
 
 // out = 1.0 / sqrt(a) result clamped to +/- max_float.
 def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
+>;
 
 def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_frexp_exp : DefaultAttrsIntrinsic<
-  [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyint_ty], [llvm_anyfloat_ty]
 >;
 
 // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
 // and always uses rtz, so is not suitable for implementing the OpenCL
 // fract function. It should be ok on VI.
 def int_amdgcn_fract : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
 >;
 
 def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
-  DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
-            [IntrNoMem, IntrSpeculatable]
+  DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]
 >;
 
 def int_amdgcn_cvt_pknorm_i16 :
   ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
-  DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
-            [IntrNoMem, IntrSpeculatable]
+  DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty]
 >;
 
 def int_amdgcn_cvt_pknorm_u16 :
   ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
-  DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
-            [IntrNoMem, IntrSpeculatable]
+  DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty]
 >;
 
 def int_amdgcn_cvt_pk_i16 :
     ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
     DefaultAttrsIntrinsic<
-  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty]
 >;
 
 def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
-  DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
-    [IntrNoMem, IntrSpeculatable]
+  DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty]
 >;
 
 def int_amdgcn_class : DefaultAttrsIntrinsic<
-  [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+  [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty]
 >;
 
 def int_amdgcn_fmed3 :
   DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
-    [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-    [IntrNoMem, IntrSpeculatable]
+    [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]
 >;
 
 def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">,
   DefaultAttrsIntrinsic<[llvm_float_ty],
-    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-    [IntrNoMem, IntrSpeculatable]
+    [llvm_float_ty, llvm_float_ty, llvm_float_ty]
 >;
 
 def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">,
   DefaultAttrsIntrinsic<[llvm_float_ty],
-  [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+  [llvm_float_ty, llvm_float_ty, llvm_float_ty]
 >;
 
 def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">,
   DefaultAttrsIntrinsic<[llvm_float_ty],
-    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-    [IntrNoMem, IntrSpeculatable]
+    [llvm_float_ty, llvm_float_ty, llvm_float_ty]
 >;
 
 def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">,
   DefaultAttrsIntrinsic<[llvm_float_ty],
-    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-    [IntrNoMem, IntrSpeculatable]
+    [llvm_float_ty, llvm_float_ty, llvm_float_ty]
 >;
 
 // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
 // should be used.
 def int_amdgcn_sffbh :
-  DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
-  [IntrNoMem, IntrSpeculatable]
+  DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]
 >;
 
 // v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support.
 def int_amdgcn_fmad_ftz :
   DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]
+>;
+
+def int_amdgcn_tanh : DefaultAttrsIntrinsic<
+  [llvm_anyfloat_ty], [LLVMMatchType<0>]
+>;
+
+def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
+  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
+
+def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
+  [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
+
+def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
+  [llvm_v2f16_ty], [llvm_i16_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
+
+def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
+  [llvm_v2f16_ty], [llvm_i16_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
+
+def int_amdgcn_cvt_pk_fp8_f16
+    : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>,
+      ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
+
+def int_amdgcn_cvt_pk_bf8_f16
+    : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>,
+      ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
+
+} // end let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
+
+def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">,
+  DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable, Commutative, IntrNoCreateUndefOrPoison]
+>;
+
+// Fused single-precision multiply-add with legacy behaviour for the multiply,
+// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
+// intended for use on subtargets that have the v_fma_legacy_f32 and/or
+// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
+// has a completely different kind of legacy behaviour.)
+def int_amdgcn_fma_legacy :
+  DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable, Commutative, IntrNoCreateUndefOrPoison]
 >;
 
 // FIXME: The m0 argument should be moved after the normal arguments
@@ -619,61 +635,37 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
 def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
 def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
 
-def int_amdgcn_tanh : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
->;
-
-def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
-  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
-
-def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
-  [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
-
-def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
-  [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
-
-def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
-  [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
-
-def int_amdgcn_cvt_pk_fp8_f16
-    : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
-                            [IntrNoMem, IntrSpeculatable]>,
-      ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
-
-def int_amdgcn_cvt_pk_bf8_f16
-    : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
-                            [IntrNoMem, IntrSpeculatable]>,
-      ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
-
 // llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
 // byte_sel selects byte to write in vdst.
 def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
   [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
 >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;
 
 // llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
 // byte_sel selects byte to write in vdst.
 def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
   [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
 >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
 
 // llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15]
 class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
-  [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+  [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 16>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
-  [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+  [DstTy], [Src0Ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
-  [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+  [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 def int_amdgcn_cvt_scale_pk8_f16_fp8   : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty,   llvm_v2i32_ty, "cvt_scale_pk8_f16_fp8">;
@@ -693,7 +685,8 @@ def int_amdgcn_cvt_scale_pk16_f32_fp6  : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty,
 def int_amdgcn_cvt_scale_pk16_f32_bf6  : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty,  llvm_v3i32_ty, "cvt_scale_pk16_f32_bf6">;
 
 class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
-  [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+  [DstTy], [Src0Ty, Src1Ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 def int_amdgcn_cvt_scalef32_pk32_fp6_f16  : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty,  "cvt_scalef32_pk32_fp6_f16">;
@@ -746,7 +739,8 @@ class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : Def
   [llvm_i32_ty,   // src
    llvm_float_ty, // scale
    llvm_i32_ty],  // src_sel index [0..3]
-  [IntrNoMem, ImmArg<ArgIndex<2>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 4>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
@@ -754,7 +748,7 @@ class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic<LLVMType DstTy, string name> : Defau
   [llvm_i32_ty,   // src
    llvm_float_ty, // scale
    llvm_i1_ty],   // src_lo_hi_sel[true false]
-  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<2>>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic<LLVMType SrcTy, string name> : DefaultAttrsIntrinsic<
@@ -763,7 +757,7 @@ class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic<LLVMType SrcTy, string nam
    SrcTy,         // src
    llvm_float_ty, // scale
    llvm_i1_ty],   // dst_lo_hi_sel[true false]
-  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<3>>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic<string name> : DefaultAttrsIntrinsic<
@@ -773,7 +767,7 @@ class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic<string name> : DefaultAttrsInt
    llvm_float_ty, // src1
    llvm_float_ty, // scale
    llvm_i1_ty],   // dst_lo_hi_sel[true false]
-  [IntrNoMem, ImmArg<ArgIndex<4>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<4>>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
@@ -783,7 +777,8 @@ class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic<LLVMType DstTy, string name> :
    llvm_float_ty, // scale
    llvm_i32_ty,   // src_sel_index[0..3]
    llvm_i1_ty],   // dst_lo_hi_sel[true false]
-  [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]
+  [IntrNoMem, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>, ImmArg<ArgIndex<4>>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : DefaultAttrsIntrinsic<
@@ -793,7 +788,8 @@ class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : DefaultAttrsIntrinsic<
    llvm_float_ty, // src1
    llvm_float_ty, // scale
    llvm_i32_ty],  // dst_sel_index[0..3]
-  [IntrNoMem, ImmArg<ArgIndex<4>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : DefaultAttrsIntrinsic<
@@ -802,7 +798,8 @@ class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : De
    SrcTy,         // src
    llvm_float_ty, // scale
    llvm_i32_ty],  // dest_sel_index [0..3]
-  [IntrNoMem, ImmArg<ArgIndex<3>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
@@ -812,7 +809,8 @@ class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, st
    llvm_i32_ty,   // seed
    llvm_float_ty, // scale
    llvm_i32_ty],  // dst_sel_index[0..3]
-  [IntrNoMem, ImmArg<ArgIndex<4>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison,
+   ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
@@ -821,7 +819,7 @@ class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<LLVMType DstTy, string name
    llvm_float_ty, // src0
    llvm_i32_ty,   // seed
    llvm_i1_ty],   // dst_lo_hi_sel[true false]
-  [IntrNoMem, ImmArg<ArgIndex<3>>]
+  [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<3>>]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
 def int_amdgcn_cvt_sr_bf16_f32: AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<llvm_v2bf16_ty, "cvt_sr_bf16_f32">;
@@ -890,13 +888,13 @@ def int_amdgcn_cvt_scalef32_sr_fp8_f16: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8Tied
 def int_amdgcn_cvt_scalef32_sr_fp8_f32: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<llvm_float_ty, "cvt_scalef32_sr_fp8_f32">;
 
 def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
-  [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]
+  [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrNoCreateUndefOrPoison]
 >, ClangBuiltin<"__builtin_amdgcn_prng_b32">;
 
 def int_amdgcn_bitop3 :
   DefaultAttrsIntrinsic<[llvm_anyint_ty],
                         [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
-                        [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
+                        [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<3>>]>;
 
 } // TargetPrefix = "amdgcn"
 
@@ -1435,7 +1...
[truncated]

arsenm

This probably should go in DefaultAttrsIntrinsic. With the exception of the intrinsics with the explicit flags for the poison generating behavior, this applies to early everything

krzysz00 · 2025-11-04T21:56:34Z

Re general applicability ... I'm not so sure about that?

A load, for example, can return a poison, no?

arsenm · 2025-11-04T22:03:40Z

Re general applicability ... I'm not so sure about that?

A load, for example, can return a poison, no?

Doesn't matter, intrinsics have to explicitly opt-in to use the default attributes. This is a majority vote question among existing intrinsics.

krzysz00 requested review from arsenm, jayfoad and skc7 November 4, 2025 21:41

llvmbot added backend:AMDGPU llvm:ir labels Nov 4, 2025

arsenm reviewed Nov 4, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Add `nocreateundeforpoison` annotations #166450

[AMDGPU] Add `nocreateundeforpoison` annotations #166450

krzysz00 commented Nov 4, 2025 •

edited by shiltian

Loading

Uh oh!

llvmbot commented Nov 4, 2025 •

edited

Loading

Uh oh!

arsenm left a comment

Uh oh!

krzysz00 commented Nov 4, 2025

Uh oh!

arsenm commented Nov 4, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[AMDGPU] Add nocreateundeforpoison annotations #166450

Are you sure you want to change the base?

[AMDGPU] Add nocreateundeforpoison annotations #166450

Conversation

krzysz00 commented Nov 4, 2025 • edited by shiltian Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

krzysz00 commented Nov 4, 2025

Uh oh!

arsenm commented Nov 4, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[AMDGPU] Add `nocreateundeforpoison` annotations #166450

[AMDGPU] Add `nocreateundeforpoison` annotations #166450

krzysz00 commented Nov 4, 2025 •

edited by shiltian

Loading

llvmbot commented Nov 4, 2025 •

edited

Loading