diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 8e35109061792..55a1272d58cb9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -398,32 +398,31 @@ def int_amdgcn_div_scale : DefaultAttrsIntrinsic< // (0 = Denominator, 1 = Numerator). [llvm_anyfloat_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; +// Floating-point arithmetic intrinsics (and integer conversions) not handled +// elsewhere. +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in { + def int_amdgcn_div_fmas : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable] + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty] >; def int_amdgcn_div_fixup : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable] ->; + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; // Look Up 2.0 / pi src0 with segment select src1[4:0] def int_amdgcn_trig_preop : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty] >; def int_amdgcn_sin : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_cos : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; // v_log_{f16|f32}, performs log2. f32 version does not handle @@ -431,7 +430,7 @@ def int_amdgcn_cos : DefaultAttrsIntrinsic< // support denormals, and the generic log2 intrinsic should be // preferred. def int_amdgcn_log : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; // v_exp_{f16|f32} (int_amdgcn_exp was taken by export @@ -440,145 +439,162 @@ def int_amdgcn_log : DefaultAttrsIntrinsic< // support denormals, and the generic exp2 intrinsic should be // preferred. def int_amdgcn_exp2 : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_log_clamp : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] ->; - -def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, Commutative] ->; - -// Fused single-precision multiply-add with legacy behaviour for the multiply, -// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is -// intended for use on subtargets that have the v_fma_legacy_f32 and/or -// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and -// has a completely different kind of legacy behaviour.) -def int_amdgcn_fma_legacy : - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, Commutative] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_rcp : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty] >; def int_amdgcn_sqrt : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_rsq : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">, DefaultAttrsIntrinsic< - [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] + [llvm_float_ty], [llvm_float_ty] >; // out = 1.0 / sqrt(a) result clamped to +/- max_float. def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; + [llvm_anyfloat_ty], [LLVMMatchType<0>] +>; def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_frexp_exp : DefaultAttrsIntrinsic< - [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable] + [llvm_anyint_ty], [llvm_anyfloat_ty] >; // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 // and always uses rtz, so is not suitable for implementing the OpenCL // fract function. It should be ok on VI. def int_amdgcn_fract : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] + [llvm_anyfloat_ty], [LLVMMatchType<0>] >; def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">, - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty] >; def int_amdgcn_cvt_pknorm_i16 : ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, - DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty] >; def int_amdgcn_cvt_pknorm_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, - DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty] >; def int_amdgcn_cvt_pk_i16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">, DefaultAttrsIntrinsic< - [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty] >; def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">, - DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty] >; def int_amdgcn_class : DefaultAttrsIntrinsic< - [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty] >; def int_amdgcn_fmed3 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable] + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>] >; def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">, DefaultAttrsIntrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + [llvm_float_ty, llvm_float_ty, llvm_float_ty] >; def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">, DefaultAttrsIntrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + [llvm_float_ty, llvm_float_ty, llvm_float_ty] >; def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">, DefaultAttrsIntrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + [llvm_float_ty, llvm_float_ty, llvm_float_ty] >; def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">, DefaultAttrsIntrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + [llvm_float_ty, llvm_float_ty, llvm_float_ty] >; // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz // should be used. def int_amdgcn_sffbh : - DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable] + DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>] >; // v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support. def int_amdgcn_fmad_ftz : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable] + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>] +>; + +def int_amdgcn_tanh : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>] +>; + +def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">; + +def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic< + [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">; + +def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_i16_ty] +>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">; + +def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_i16_ty] +>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">; + +def int_amdgcn_cvt_pk_fp8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">; + +def int_amdgcn_cvt_pk_bf8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">; + +} // end let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] + +def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative, IntrNoCreateUndefOrPoison] +>; + +// Fused single-precision multiply-add with legacy behaviour for the multiply, +// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is +// intended for use on subtargets that have the v_fma_legacy_f32 and/or +// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and +// has a completely different kind of legacy behaviour.) +def int_amdgcn_fma_legacy : + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative, IntrNoCreateUndefOrPoison] >; // FIXME: The m0 argument should be moved after the normal arguments @@ -619,61 +635,37 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; -def int_amdgcn_tanh : DefaultAttrsIntrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] ->; - -def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic< - [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] ->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">; - -def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic< - [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] ->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">; - -def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic< - [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] ->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">; - -def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic< - [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] ->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">; - -def int_amdgcn_cvt_pk_fp8_f16 - : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], - [IntrNoMem, IntrSpeculatable]>, - ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">; - -def int_amdgcn_cvt_pk_bf8_f16 - : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], - [IntrNoMem, IntrSpeculatable]>, - ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">; - // llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] // byte_sel selects byte to write in vdst. def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic< [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">; // llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] // byte_sel selects byte to write in vdst. def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic< [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">; // llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15] class AMDGPUCvtScaleIntrinsic : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg>] + [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 16>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32Intrinsic : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] + [DstTy], [Src0Ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32SRIntrinsic : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] + [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >, ClangBuiltin<"__builtin_amdgcn_"#name>; def int_amdgcn_cvt_scale_pk8_f16_fp8 : AMDGPUCvtScaleIntrinsic; @@ -693,7 +685,8 @@ def int_amdgcn_cvt_scale_pk16_f32_fp6 : AMDGPUCvtScaleIntrinsic; class AMDGPUCvtScaleF32ToFP6BF6Intrinsic : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] + [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >, ClangBuiltin<"__builtin_amdgcn_"#name>; def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic; @@ -746,7 +739,8 @@ class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic : Def [llvm_i32_ty, // src llvm_float_ty, // scale llvm_i32_ty], // src_sel index [0..3] - [IntrNoMem, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic : DefaultAttrsIntrinsic< @@ -754,7 +748,7 @@ class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic : Defau [llvm_i32_ty, // src llvm_float_ty, // scale llvm_i1_ty], // src_lo_hi_sel[true false] - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -763,7 +757,7 @@ class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -773,7 +767,7 @@ class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic : DefaultAttrsInt llvm_float_ty, // src1 llvm_float_ty, // scale llvm_i1_ty], // dst_lo_hi_sel[true false] - [IntrNoMem, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -783,7 +777,8 @@ class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic : llvm_float_ty, // scale llvm_i32_ty, // src_sel_index[0..3] llvm_i1_ty], // dst_lo_hi_sel[true false] - [IntrNoMem, ImmArg>, ImmArg>] + [IntrNoMem, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 4>, ImmArg>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32ToFP4Intrinsic : DefaultAttrsIntrinsic< @@ -793,7 +788,8 @@ class AMDGPUCvtScaleF32ToFP4Intrinsic : DefaultAttrsIntrinsic< llvm_float_ty, // src1 llvm_float_ty, // scale llvm_i32_ty], // dst_sel_index[0..3] - [IntrNoMem, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -802,7 +798,8 @@ class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic : De SrcTy, // src llvm_float_ty, // scale llvm_i32_ty], // dest_sel_index [0..3] - [IntrNoMem, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -812,7 +809,8 @@ class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, + ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -821,7 +819,7 @@ class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; def int_amdgcn_cvt_sr_bf16_f32: AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic; @@ -890,13 +888,13 @@ def int_amdgcn_cvt_scalef32_sr_fp8_f16: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8Tied def int_amdgcn_cvt_scalef32_sr_fp8_f32: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic; def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic< - [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] + [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrNoCreateUndefOrPoison] >, ClangBuiltin<"__builtin_amdgcn_prng_b32">; def int_amdgcn_bitop3 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>]>; + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>]>; } // TargetPrefix = "amdgcn" @@ -1435,7 +1433,7 @@ def int_amdgcn_make_buffer_rsrc : DefaultAttrsIntrinsic < llvm_i32_ty], // flags // Attributes lifted from ptrmask + some extra argument attributes. [IntrNoMem, ReadNone>, - IntrSpeculatable]>; + IntrSpeculatable, IntrNoCreateUndefOrPoison]>; defset list AMDGPUBufferIntrinsics = { @@ -2352,12 +2350,12 @@ def int_amdgcn_live_mask : DefaultAttrsIntrinsic <[llvm_i1_ty], def int_amdgcn_mbcnt_lo : ClangBuiltin<"__builtin_amdgcn_mbcnt_lo">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrNoCreateUndefOrPoison]>; def int_amdgcn_mbcnt_hi : ClangBuiltin<"__builtin_amdgcn_mbcnt_hi">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrNoCreateUndefOrPoison]>; // llvm.amdgcn.ds.swizzle src offset def int_amdgcn_ds_swizzle : @@ -2379,55 +2377,55 @@ def int_amdgcn_sbfe : DefaultAttrsIntrinsic<[llvm_anyint_ty], def int_amdgcn_lerp : ClangBuiltin<"__builtin_amdgcn_lerp">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_sad_u8 : ClangBuiltin<"__builtin_amdgcn_sad_u8">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_msad_u8 : ClangBuiltin<"__builtin_amdgcn_msad_u8">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_sad_hi_u8 : ClangBuiltin<"__builtin_amdgcn_sad_hi_u8">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_sad_u16 : ClangBuiltin<"__builtin_amdgcn_sad_u16">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_qsad_pk_u16_u8 : ClangBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_mqsad_pk_u16_u8 : ClangBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_mqsad_u32_u8 : ClangBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_cvt_pk_u8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_icmp : @@ -2444,7 +2442,7 @@ def int_amdgcn_fcmp : // in all active lanes, and zero in all inactive lanes. def int_amdgcn_ballot : Intrinsic<[llvm_anyint_ty], [llvm_i1_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]>; // Inverse of ballot: return the bit corresponding to the current lane from the // given mask. @@ -2452,7 +2450,7 @@ def int_amdgcn_ballot : // This is only defined for dynamically uniform masks and therefore convergent. def int_amdgcn_inverse_ballot : Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]>; // Lowers to S_BITREPLICATE_B64_B32. // The argument must be uniform; otherwise, the result is undefined. @@ -2491,7 +2489,7 @@ defm int_amdgcn_wave_reduce_ : AMDGPUWaveReduceOps; def int_amdgcn_readfirstlane : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]>; // The lane argument must be uniform across the currently active threads of the // current wave. Otherwise, the result is undefined. @@ -2520,22 +2518,22 @@ def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, // When returning i64, they're lowered to a mul24/mulhi24 pair. def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_mulhi_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_mulhi_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) @@ -2675,13 +2673,13 @@ def int_amdgcn_set_inactive_chain_arg : // Return if the given flat pointer points to a local memory address. def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">, DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address) + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] // FIXME: This should be captures(ret: address) >; // Return if the given flat pointer points to a prvate memory address. def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">, DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address) + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] // FIXME: This should be captures(ret: address) >; // A uniform tail call to a function with the `amdgpu_cs_chain` or @@ -2948,7 +2946,8 @@ class AMDGPUWmmaIntrinsic : LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C ], - [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, + IntrNoCreateUndefOrPoison] >; class AMDGPUWmmaIntrinsicOPSEL : @@ -2960,7 +2959,8 @@ class AMDGPUWmmaIntrinsicOPSEL : LLVMMatchType<0>, // %C llvm_i1_ty, // %high (op_sel) for GFX11, 0 for GFX12 ], - [IntrNoMem, IntrConvergent, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] + [IntrNoMem, IntrConvergent, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree, + IntrNoCreateUndefOrPoison] >; class AMDGPUWmmaIntrinsicIU : @@ -2974,7 +2974,8 @@ class AMDGPUWmmaIntrinsicIU : LLVMMatchType<0>, // %C llvm_i1_ty, // %clamp ], - [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] + [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree, + IntrNoCreateUndefOrPoison] >; // WMMA GFX11Only @@ -3079,7 +3080,7 @@ class AMDGPUSWmmacIntrinsicIdx, // %C Index // %Sparsity index for A ], - [IntrNoMem, IntrConvergent, IntrWillReturn] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison] >; class AMDGPUSWmmacIntrinsicIUIdx : @@ -3094,7 +3095,7 @@ class AMDGPUSWmmacIntrinsicIUIdx>, ImmArg>, ImmArg>] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison, ImmArg>, ImmArg>, ImmArg>] >; defset list AMDGPUWMMAIntrinsicsGFX12 = { @@ -3214,7 +3215,7 @@ def int_amdgcn_fdot2 : llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // f16 %r = llvm.amdgcn.fdot2.f16.f16(v2f16 %a, v2f16 %b, f16 %c) @@ -3228,7 +3229,7 @@ def int_amdgcn_fdot2_f16_f16 : llvm_v2f16_ty, // %b llvm_half_ty // %c ], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; // bf16 %r = llvm.amdgcn.fdot2.bf16.bf16(v2bf16 %a, v2bf16 %b, bf16 %c) @@ -3242,7 +3243,7 @@ def int_amdgcn_fdot2_bf16_bf16 : llvm_v2bf16_ty, // %b llvm_bfloat_ty // %c ], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; // f32 %r = llvm.amdgcn.fdot2.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) @@ -3257,7 +3258,7 @@ def int_amdgcn_fdot2_f32_bf16 : llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // f32 %r = llvm.amdgcn.fdot2c.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) @@ -3275,7 +3276,7 @@ def int_amdgcn_fdot2c_f32_bf16 : llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) @@ -3290,7 +3291,7 @@ def int_amdgcn_sdot2 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) @@ -3305,7 +3306,7 @@ def int_amdgcn_udot2 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) @@ -3320,7 +3321,7 @@ def int_amdgcn_sdot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) @@ -3335,7 +3336,7 @@ def int_amdgcn_udot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // i32 %r = llvm.amdgcn.sudot4(i1 %a_sign, v4i8 (as i32) %a, i1 %b_sign, v4i8 (as i32) %b, i32 %c, i1 %clamp) @@ -3355,7 +3356,7 @@ def int_amdgcn_sudot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>, ImmArg>, ImmArg>] >; @@ -3372,7 +3373,7 @@ def int_amdgcn_sdot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) @@ -3388,7 +3389,7 @@ def int_amdgcn_udot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>] >; // i32 %r = llvm.amdgcn.sudot8(i1 %a_sign, v8i4 (as i32) %a, i1 %b_sign, v8i4 (as i32) %b, i32 %c, i1 %clamp) @@ -3409,7 +3410,7 @@ def int_amdgcn_udot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>, ImmArg>, ImmArg>] >; @@ -3424,7 +3425,7 @@ class AMDGPU8bitFloatDot4Intrinsic : llvm_i32_ty, // %b llvm_float_ty, // %c ], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; def int_amdgcn_dot4_f32_fp8_bf8 : AMDGPU8bitFloatDot4Intrinsic; @@ -3442,7 +3443,7 @@ class AMDGPUMfmaIntrinsic : DefaultAttrsIntrinsic<[DestTy], [SrcABTy, SrcABTy, DestTy, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, + [IntrConvergent, IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>, ImmArg>, ImmArg>]>; @@ -3467,7 +3468,7 @@ class AMDGPUMfmaScaleIntrinsic : llvm_i32_ty, // op_sel (B matrix scale, 2-bits) // TODO: Make i2? llvm_i32_ty // v_mfma_ld_scale_b32 src1 (B matrix scale) ], - [IntrConvergent, IntrNoMem, + [IntrConvergent, IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>, ImmArg>, ImmArg>, ImmArg> ]>; @@ -3531,7 +3532,7 @@ class AMDGPUMSmfmacIntrinsic : DefaultAttrsIntrinsic<[DestTy], [SrcA, SrcB, DestTy, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, + [IntrConvergent, IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>, ImmArg>]>; class AMDGPUMFp8SmfmacIntrinsic : @@ -3586,32 +3587,32 @@ def int_amdgcn_cvt_f32_fp8_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8_e5m def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">, DefaultAttrsIntrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>]>; // llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel. def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">, DefaultAttrsIntrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>]>; // llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel // word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes. def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>]>; // llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>]>; // llvm.amdgcn.cvt.pk.fp8.f32.e5m3 int vdst, float srcA, float srcB, int old, imm word_sel def int_amdgcn_cvt_pk_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32_e5m3">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>]>; // llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] // byte_sel selects byte to write into vdst. @@ -3636,7 +3637,7 @@ def int_amdgcn_cvt_sr_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">, DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], - [IntrNoMem, IntrSpeculatable]>; + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]>; //===----------------------------------------------------------------------===// // gfx950 intrinsics @@ -3684,12 +3685,12 @@ def int_amdgcn_permlane32_swap : // llvm.amdgcn.ashr_pk_i8_i32 int vdst, int src0, int src1 int src2 def int_amdgcn_ashr_pk_i8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_i8_i32">, DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable]>; + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]>; // llvm.amdgcn.ashr_pk_u8_i32 int vdst, int src0, int src1 int src2 def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">, DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable]>; + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]>; //===----------------------------------------------------------------------===// // gfx1250 intrinsics @@ -3739,13 +3740,13 @@ def int_amdgcn_cvt_f16_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_fp8">, def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">, DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrNoCreateUndefOrPoison, ImmArg>]>; def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]>; def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]>; // llvm.amdgcn.permlane.bcast def int_amdgcn_permlane_bcast : ClangBuiltin<"__builtin_amdgcn_permlane_bcast">, @@ -3791,7 +3792,7 @@ def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4" class AMDGPUAddMinMax : ClangBuiltin<"__builtin_amdgcn_"#Name>, DefaultAttrsIntrinsic<[Ty], [Ty, Ty, Ty, llvm_i1_ty /* clamp */], - [IntrNoMem, IntrSpeculatable, ImmArg>]>; + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison, ImmArg>]>; def int_amdgcn_add_max_i32 : AMDGPUAddMinMax; def int_amdgcn_add_max_u32 : AMDGPUAddMinMax; @@ -3877,7 +3878,7 @@ def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent, IntrNoCallback, // pass based on !fpmath metadata. def int_amdgcn_fdiv_fast : DefaultAttrsIntrinsic< [llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] >; // Async instructions increment ASYNCcnt which is modeled as InaccessibleMem. @@ -3962,7 +3963,7 @@ class AMDGPUWmmaIntrinsicModsAB : llvm_i1_ty, // matrix_b_reuse ], [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, ImmArg>, - IntrWillReturn, IntrNoCallback, IntrNoFree] + IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison] >; class AMDGPUWmmaIntrinsicModsC : @@ -3977,7 +3978,7 @@ class AMDGPUWmmaIntrinsicModsC : llvm_i1_ty, // matrix_b_reuse ], [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, - IntrWillReturn, IntrNoCallback, IntrNoFree] + IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison] >; class AMDGPUWmmaIntrinsicF4ModsC : @@ -3989,7 +3990,9 @@ class AMDGPUWmmaIntrinsicF4ModsC : llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs) LLVMMatchType<0>, // %C ], - [IntrNoMem, IntrConvergent, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] + [IntrNoMem, IntrConvergent, ImmArg>, + IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison + ] >; class AMDGPUWmmaIntrinsicModsAll : @@ -4076,7 +4079,7 @@ class AMDGPUWmmaScaleIntrinsicModsC : ], [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, - IntrWillReturn, IntrNoCallback, IntrNoFree] + IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison] >; class AMDGPUWmmaScaleF4IntrinsicModsC : @@ -4098,7 +4101,7 @@ class AMDGPUWmmaScaleF4IntrinsicModsC : ], [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, - IntrWillReturn, IntrNoCallback, IntrNoFree] + IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison] >; defset list AMDGPUWMMAIntrinsicsGFX1250 = { @@ -4146,7 +4149,8 @@ class AMDGPUSWmmacIntrinsicABIdx>, ImmArg>, ImmArg>, ImmArg>] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison, + ImmArg>, ImmArg>, ImmArg>, ImmArg>] >; defset list AMDGPUSWMMACIntrinsicsGFX1250 = { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll index f730199e474f3..4db668e05cb21 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll @@ -1031,7 +1031,7 @@ attributes #1 = { "amdgpu-waves-per-eu"="1,1" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } ; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll index 2776b9187724c..4e53df3924985 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll @@ -7,12 +7,14 @@ @lds_3 = external addrspace(3) global [0 x i8], align 4 @lds_4 = external addrspace(3) global [0 x i8], align 8 +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address ;. ; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] ; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] ; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address -; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address -; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address ;. define void @use_variables() sanitize_address { ; CHECK-LABEL: define void @use_variables( @@ -248,7 +250,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR7]] = { nomerge } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll index f33b30119754f..32601422c7e67 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll @@ -66,7 +66,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK-NEXT: [[TMP38:%.*]] = and i1 [[TMP34]], [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP38]]) ; CHECK-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP39]], 0 -; CHECK-NEXT: br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF3:![0-9]+]] ; CHECK: asan.report: ; CHECK-NEXT: br i1 [[TMP38]], label [[TMP41:%.*]], label [[CONDFREE:%.*]] ; CHECK: 41: @@ -103,11 +103,12 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR6]] = { nomerge } ;. ; CHECK: [[META0]] = !{i32 0, i32 1} ; CHECK: [[META1]] = !{i32 8, i32 9} -; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575} ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll index 40b1305a3b12c..bad2d8e0fb5f4 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll @@ -8,12 +8,14 @@ @lds_3 = external addrspace(3) global [0 x i8], align 4 @lds_4 = external addrspace(3) global [0 x i8], align 8 +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address ;. ; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] ; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] ; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address -; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address -; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address ;. define void @use_variables() sanitize_address { ; CHECK-LABEL: define void @use_variables( @@ -249,7 +251,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR7]] = { nomerge } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll index f2cdc4c812db1..c5985e5cc4df8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll @@ -88,7 +88,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK-NEXT: [[TMP53:%.*]] = and i1 [[TMP49]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP53]]) ; CHECK-NEXT: [[TMP55:%.*]] = icmp ne i64 [[TMP54]], 0 -; CHECK-NEXT: br i1 [[TMP55]], label [[ASAN_REPORT:%.*]], label [[TMP58:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP55]], label [[ASAN_REPORT:%.*]], label [[TMP58:%.*]], !prof [[PROF3:![0-9]+]] ; CHECK: asan.report: ; CHECK-NEXT: br i1 [[TMP53]], label [[TMP56:%.*]], label [[CONDFREE:%.*]] ; CHECK: 56: @@ -114,7 +114,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK-NEXT: [[TMP71:%.*]] = and i1 [[TMP66]], [[TMP70]] ; CHECK-NEXT: [[TMP72:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP71]]) ; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i64 [[TMP72]], 0 -; CHECK-NEXT: br i1 [[TMP73]], label [[ASAN_REPORT1:%.*]], label [[TMP76:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP73]], label [[ASAN_REPORT1:%.*]], label [[TMP76:%.*]], !prof [[PROF3]] ; CHECK: asan.report1: ; CHECK-NEXT: br i1 [[TMP71]], label [[TMP74:%.*]], label [[TMP75:%.*]] ; CHECK: 74: @@ -139,7 +139,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK-NEXT: [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]] ; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]]) ; CHECK-NEXT: [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0 -; CHECK-NEXT: br i1 [[TMP90]], label [[ASAN_REPORT2:%.*]], label [[TMP93:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP90]], label [[ASAN_REPORT2:%.*]], label [[TMP93:%.*]], !prof [[PROF3]] ; CHECK: asan.report2: ; CHECK-NEXT: br i1 [[TMP88]], label [[TMP91:%.*]], label [[TMP92:%.*]] ; CHECK: 91: @@ -164,7 +164,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK-NEXT: [[TMP105:%.*]] = and i1 [[TMP101]], [[TMP104]] ; CHECK-NEXT: [[TMP106:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP105]]) ; CHECK-NEXT: [[TMP107:%.*]] = icmp ne i64 [[TMP106]], 0 -; CHECK-NEXT: br i1 [[TMP107]], label [[ASAN_REPORT3:%.*]], label [[TMP110:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP107]], label [[ASAN_REPORT3:%.*]], label [[TMP110:%.*]], !prof [[PROF3]] ; CHECK: asan.report3: ; CHECK-NEXT: br i1 [[TMP105]], label [[TMP108:%.*]], label [[TMP109:%.*]] ; CHECK: 108: @@ -203,11 +203,12 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR6]] = { nomerge } ;. ; CHECK: [[META0]] = !{i32 0, i32 1} ; CHECK: [[META1]] = !{i32 8, i32 9} -; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575} ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll index b9b4c90daea87..fa888a35cb8ba 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll @@ -7,11 +7,13 @@ @lds_3 = external addrspace(3) global [3 x i8], align 4 @lds_4 = external addrspace(3) global [4 x i8], align 8 +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address ;. ; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] ; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address -; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address -; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address ;. define void @use_variables() sanitize_address { ; CHECK-LABEL: define void @use_variables( @@ -217,7 +219,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR7]] = { nomerge } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll index a70db2259cc3f..a521d9d9d436b 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll @@ -6,10 +6,11 @@ @lds_var = internal addrspace(3) global [1024 x i32] poison, align 4 +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address ;. ; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] ; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 32, i32 4096, i32 5120 } }, no_sanitize_address -; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address ;. define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address { ; CHECK-LABEL: define void @my_function( @@ -33,7 +34,7 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address { ; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP12]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP17]]) ; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0 -; CHECK-NEXT: br i1 [[TMP19]], label [[ASAN_REPORT:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP19]], label [[ASAN_REPORT:%.*]], label [[TMP22:%.*]], !prof [[PROF2:![0-9]+]] ; CHECK: asan.report: ; CHECK-NEXT: br i1 [[TMP17]], label [[TMP20:%.*]], label [[TMP21:%.*]] ; CHECK: 20: @@ -60,7 +61,7 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address { ; CHECK-NEXT: [[TMP36:%.*]] = and i1 [[TMP31]], [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP36]]) ; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i64 [[TMP37]], 0 -; CHECK-NEXT: br i1 [[TMP38]], label [[ASAN_REPORT1:%.*]], label [[TMP41:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[TMP38]], label [[ASAN_REPORT1:%.*]], label [[TMP41:%.*]], !prof [[PROF2]] ; CHECK: asan.report1: ; CHECK-NEXT: br i1 [[TMP36]], label [[TMP39:%.*]], label [[TMP40:%.*]] ; CHECK: 39: @@ -81,7 +82,7 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address { define amdgpu_kernel void @my_kernel() sanitize_address { ; CHECK-LABEL: define amdgpu_kernel void @my_kernel( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] { ; CHECK-NEXT: WId: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() @@ -142,11 +143,12 @@ define amdgpu_kernel void @my_kernel() sanitize_address { ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR7]] = { nomerge } ;. ; CHECK: [[META0]] = !{i32 0, i32 1} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} -; CHECK: [[META2]] = !{i32 0} +; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META3]] = !{i32 0} ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll index 73ffcdd783ded..ab3300ea659b8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll @@ -150,7 +150,7 @@ attributes #1 = { "amdgpu-no-heap-ptr" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR6]] = { nomerge } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll index 301bda7e0086e..c7550dd9576ec 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll @@ -149,7 +149,7 @@ define amdgpu_kernel void @k0() sanitize_address { ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR6]] = { nomerge } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll index 02a241f947748..15b074c2d9c11 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll @@ -122,7 +122,7 @@ define amdgpu_kernel void @atomic_xchg_kernel(ptr addrspace(1) %out, [8 x i32], ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR6]] = { nomerge } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll index b87b3fd824dd3..1b3664bf1e4e7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll @@ -204,7 +204,7 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } ; CHECK: attributes #[[ATTR6]] = { nomerge } ;. diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll index 60ef1079624d7..218a0dfa9b060 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll @@ -8,9 +8,11 @@ target triple = "x86_64-unknown-linux-gnu" ; Function with sanitize_address is instrumented. ; Function Attrs: nounwind uwtable ;. +; CHECK: @llvm.used = appending global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata" ; CHECK: @___asan_globals_registered = common hidden global i64 0 ; CHECK: @__start_asan_globals = extern_weak hidden global i64 ; CHECK: @__stop_asan_globals = extern_weak hidden global i64 +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] ;. define void @instr_sa(ptr %a) sanitize_address { ; CHECK: Function Attrs: sanitize_address @@ -31,7 +33,7 @@ define void @instr_sa(ptr %a) sanitize_address { ; CHECK-NEXT: [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB12]] ; CHECK: [[BB11]]: -; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: [[BB12]]: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 @@ -47,8 +49,9 @@ entry: } ;. ; CHECK: attributes #[[ATTR0]] = { sanitize_address } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK: attributes #[[ATTR2]] = { nomerge } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR3]] = { nomerge } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll b/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll index d408f949db824..64e1588d043ae 100644 --- a/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll +++ b/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll @@ -24,8 +24,8 @@ declare float @convergent.extern.func(float, float) #0 declare float @extern.func(float, float) declare float @llvm.amdgcn.readfirstlane.f32(float) #1 -; RESULT: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(none) } +; RESULT: attributes #0 = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) } ; RESULT-NOT: attributes attributes #0 = { convergent nounwind } -attributes #1 = { convergent nocallback nofree nounwind willreturn memory(none) } +attributes #1 = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }