-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[HLSL] Add load overload with status #166449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-hlsl Author: Joshua Batista (bob80905) ChangesThis PR adds a new overload for resources, which takes an additional parameter by reference, status. It fills the status parameter with a 1 or 0, depending on whether or not the resource access was mapped. CheckAccessFullyMapped is also added as an intrinsic, and called in the production of this status bit. Patch is 20.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166449.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 2b400b012d6ed..f2b4c54a5ba6b 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4934,6 +4934,12 @@ def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLResourceLoadWithStatus : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_resource_load_with_status"];
+ let Attributes = [NoThrow];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_uninitializedhandle"];
let Attributes = [NoThrow];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index fbf4a5722caed..ff3d906d4bf11 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -353,6 +353,64 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
ArrayRef<Value *>{HandleOp, IndexOp});
}
+ case Builtin::BI__builtin_hlsl_resource_load_with_status: {
+ Value *HandleOp = EmitScalarExpr(E->getArg(0));
+ Value *IndexOp = EmitScalarExpr(E->getArg(1));
+
+ // Get the *address* of the status argument (since it's a reference)
+ LValue StatusLVal = EmitLValue(E->getArg(2));
+ Address StatusAddr = StatusLVal.getAddress();
+
+ QualType HandleTy = E->getArg(0)->getType();
+ const HLSLAttributedResourceType *RT =
+ HandleTy->getAs<HLSLAttributedResourceType>();
+ assert(RT && "Expected a resource type as first parameter");
+
+ Intrinsic::ID IntrID = RT->getAttrs().RawBuffer
+ ? llvm::Intrinsic::dx_resource_load_rawbuffer
+ : llvm::Intrinsic::dx_resource_load_typedbuffer;
+
+ llvm::Type *DataTy = ConvertType(E->getType());
+ llvm::Type *RetTy = llvm::StructType::get(Builder.getContext(),
+ {DataTy, Builder.getInt1Ty()});
+
+ SmallVector<Value *, 3> Args;
+ Args.push_back(HandleOp);
+ Args.push_back(IndexOp);
+
+ if (RT->getAttrs().RawBuffer) {
+ Args.push_back(Builder.getInt32(0)); // dummy offset
+ }
+
+ // Call the intrinsic (returns a struct)
+ Value *ResRet =
+ Builder.CreateIntrinsic(RetTy, IntrID, Args, {}, "ld.struct");
+
+ // Extract the loaded data (first element of the struct)
+ Value *LoadedValue = Builder.CreateExtractValue(ResRet, {0}, "ld.value");
+
+ // Extract the status bit (second element of the struct)
+ Value *StatusBit = Builder.CreateExtractValue(ResRet, {1}, "ld.status");
+
+ // Extend the 1-bit status to 32-bit, because the check access fully mapped
+ // intrinsic expects i32 input
+ Value *StatusInt =
+ Builder.CreateZExt(StatusBit, Builder.getInt32Ty(), "ld.status.i32");
+
+ // Call CheckAccessFullyMapped(i32)
+ llvm::Function *CheckAccessFn =
+ CGM.getIntrinsic(llvm::Intrinsic::dx_check_access_fully_mapped);
+
+ Value *StatusBool =
+ Builder.CreateCall(CheckAccessFn, {StatusInt}, "ld.status.checked");
+
+ // Extend the boolean to 32-bit uint for storing in user�s var
+ Value *ExtendedStatus =
+ Builder.CreateZExt(StatusBool, Builder.getInt32Ty(), "ld.status.ext");
+
+ Builder.CreateStore(ExtendedStatus, StatusAddr);
+ return LoadedValue;
+ }
case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
return llvm::PoisonValue::get(HandleTy);
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index d35df524fdc84..f6c654bd61ff5 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -126,6 +126,9 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateResourceGetPointer,
resource_getpointer)
+
+ GENERATE_HLSL_INTRINSIC_FUNCTION(CreateResourceLoadTypedBuffer,
+ resource_load_typedbuffer)
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding,
resource_handlefrombinding)
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding,
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index 066acf6f01a90..bdbc2d4ce87b3 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -1140,6 +1140,7 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addLoadMethods() {
DeclarationName Load(&II);
// TODO: We also need versions with status for CheckAccessFullyMapped.
addHandleAccessFunction(Load, /*IsConst=*/false, /*IsRef=*/false);
+ addLoadWithStatusFunction(Load, /*IsConst=*/false, /*IsRef=*/false);
return *this;
}
@@ -1232,6 +1233,40 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addDecrementCounterMethod() {
.finalize();
}
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addLoadWithStatusFunction(DeclarationName &Name,
+ bool IsConst, bool IsRef) {
+ assert(!Record->isCompleteDefinition() && "record is already complete");
+ ASTContext &AST = SemaRef.getASTContext();
+ using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+
+ QualType ElemTy = getHandleElementType();
+ QualType AddrSpaceElemTy =
+ AST.getAddrSpaceQualType(ElemTy, LangAS::hlsl_device);
+ QualType ElemPtrTy = AST.getPointerType(AddrSpaceElemTy);
+ QualType ReturnTy;
+
+ if (IsRef) {
+ ReturnTy = AddrSpaceElemTy;
+ if (IsConst)
+ ReturnTy.addConst();
+ ReturnTy = AST.getLValueReferenceType(ReturnTy);
+ } else {
+ ReturnTy = ElemTy;
+ if (IsConst)
+ ReturnTy.addConst();
+ }
+
+ QualType StatusRefTy = AST.getLValueReferenceType(AST.UnsignedIntTy);
+ return BuiltinTypeMethodBuilder(*this, Name, ReturnTy, IsConst)
+ .addParam("Index", AST.UnsignedIntTy)
+ .addParam("Status", StatusRefTy)
+ .callBuiltin("__builtin_hlsl_resource_load_with_status", ElemPtrTy,
+ PH::Handle, PH::_0, PH::_1)
+ .dereference(PH::LastStmt)
+ .finalize();
+}
+
BuiltinTypeDeclBuilder &
BuiltinTypeDeclBuilder::addHandleAccessFunction(DeclarationName &Name,
bool IsConst, bool IsRef) {
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
index 95e3a6c4fb2f1..4941b8b7952a2 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
@@ -91,6 +91,8 @@ class BuiltinTypeDeclBuilder {
BuiltinTypeDeclBuilder &addDecrementCounterMethod();
BuiltinTypeDeclBuilder &addHandleAccessFunction(DeclarationName &Name,
bool IsConst, bool IsRef);
+ BuiltinTypeDeclBuilder &addLoadWithStatusFunction(DeclarationName &Name,
+ bool IsConst, bool IsRef);
BuiltinTypeDeclBuilder &addAppendMethod();
BuiltinTypeDeclBuilder &addConsumeMethod();
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 94a490a8f68dc..f95a647b5679f 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3010,6 +3010,27 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
break;
}
+ case Builtin::BI__builtin_hlsl_resource_load_with_status: {
+ if (SemaRef.checkArgCount(TheCall, 3) ||
+ CheckResourceHandle(&SemaRef, TheCall, 0) ||
+ CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+ SemaRef.getASTContext().UnsignedIntTy) ||
+ CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+ SemaRef.getASTContext().UnsignedIntTy))
+ return true;
+
+ auto *ResourceTy =
+ TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>();
+ QualType ContainedTy = ResourceTy->getContainedType();
+ auto ReturnType =
+ SemaRef.Context.getAddrSpaceQualType(ContainedTy, LangAS::hlsl_device);
+ ReturnType = SemaRef.Context.getPointerType(ReturnType);
+ TheCall->setType(ReturnType);
+ TheCall->setValueKind(VK_LValue);
+
+ break;
+ }
+
case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
if (SemaRef.checkArgCount(TheCall, 1) ||
CheckResourceHandle(&SemaRef, TheCall, 0))
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
index 1f248d0560006..e692a14ecc168 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
@@ -104,6 +104,50 @@ export float TestLoad() {
// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]]
// CHECK-NEXT: ret float %[[VAL]]
+export float TestLoadWithStatus() {
+ uint s1;
+ uint s2;
+ float ret = RWSB1.Load(1, s1) + SB1.Load(2, s2);
+ ret += float(s1 + s2);
+ return ret;
+}
+
+// CHECK: define noundef nofpclass(nan inf) float @TestLoadWithStatus()()
+// CHECK: call {{.*}} float @hlsl::RWStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @RWSB1, i32 noundef 1, ptr noundef nonnull align 4 dereferenceable(4) %s1)
+// CHECK: call {{.*}} float @hlsl::StructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @SB1, i32 noundef 2, ptr noundef nonnull align 4 dereferenceable(4) %s2)
+// CHECK: add
+// CHECK: ret float
+
+// CHECK: define {{.*}} float @hlsl::RWStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWStructuredBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS:.*]] = load ptr, ptr %Status.addr,
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %[[HANDLE]], i32 %[[INDEX]], i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+// CHECK: define {{.*}} float @hlsl::StructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::StructuredBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS_HANDLE:.*]] = load ptr, ptr %Status.addr, align 4, !nonnull !3, !align !4
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0) %0, i32 %1, i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
export uint TestGetDimensions() {
uint dim1, dim2, dim3, stride1, stride2, stride3;
SB1.GetDimensions(dim1, stride1);
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
index 25fa75965d686..77aaf281fbc39 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
@@ -65,6 +65,49 @@ export float TestLoad() {
// CHECK-NEXT: %[[VAL:.*]] = load <2 x i32>, ptr %[[BUFPTR]]
// CHECK-NEXT: ret <2 x i32> %[[VAL]]
+export float TestLoadWithStatus() {
+ uint status;
+ uint status2;
+ float val = ROSB1.Load(10, status).x + ROSB2.Load(20, status2).x;
+ return val + float(status + status2);
+}
+
+// CHECK: define {{.*}} float @TestLoadWithStatus()()
+// CHECK: call {{.*}} float @hlsl::RasterizerOrderedStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @ROSB1, i32 noundef 10, ptr noundef nonnull align 4 dereferenceable(4) %status)
+// CHECK: call {{.*}} <2 x i32> @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::Load(unsigned int, unsigned int&)(ptr {{.*}} @ROSB2, i32 noundef 20, ptr noundef nonnull align 4 dereferenceable(4) %status2)
+// CHECK: ret
+
+// CHECK: define {{.*}} float @hlsl::RasterizerOrderedStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer", ptr {{.*}}, i32 0, i32 0
+// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 1), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS:.*]] = load ptr, ptr %Status.addr,
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %[[HANDLE]], i32 %[[INDEX]], i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+// CHECK: define {{.*}} <2 x i32> @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::Load(unsigned int, unsigned int&)(ptr {{.*}} %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer.0", ptr {{.*}}, i32 0, i32 0
+// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", <2 x i32>, 1, 1), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS_HANDLE:.*]] = load ptr, ptr %Status.addr, align 4, !nonnull !3, !align !4
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_v2i32_1_1t(target("dx.RawBuffer", <2 x i32>, 1, 1) %0, i32 %1, i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+
export uint TestGetDimensions() {
uint dim1, dim2, stride1, stride2;
ROSB1.GetDimensions(dim1, stride1);
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
index fdc1ef08b7c2c..14607b58c67d9 100644
--- a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
@@ -38,6 +38,50 @@ export float TestLoad() {
// CHECK-NEXT: %[[VEC:.*]] = load <4 x i32>, ptr %[[PTR]]
// CHECK-NEXT: ret <4 x i32> %[[VEC]]
+export float TestLoadWithStatus() {
+ uint s1;
+ uint s2;
+ float ret = Buf.Load(1, s1) + float(RWBuf.Load(2, s2).y);
+ ret += float(s1 + s2);
+ return ret;
+}
+
+// CHECK: define noundef nofpclass(nan inf) float @TestLoadWithStatus()()
+// CHECK: call {{.*}} float @hlsl::Buffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @Buf, i32 noundef 1, ptr noundef nonnull align 4 dereferenceable(4) %s1)
+// CHECK: call {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int, unsigned int&)(ptr {{.*}} @RWBuf, i32 noundef 2, ptr noundef nonnull align 4 dereferenceable(4) %s2)
+// CHECK: add
+// CHECK: ret float
+
+// CHECK: define {{.*}} float @hlsl::Buffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", float, 0, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS:.*]] = load ptr, ptr %Status.addr,
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.typedbuffer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0) %[[HANDLE]], i32 %[[INDEX]])
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+// CHECK: define {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS_HANDLE:.*]] = load ptr, ptr %Status.addr, align 4, !nonnull !3, !align !4
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.typedbuffer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %0, i32 %1)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load <4 x i32>, ptr %[[VALUE]]
+// CHECK-NEXT: ret <4 x i32> %[[RETVAL]]
+
export uint TestGetDimensions() {
uint dim1, dim2;
Buf.GetDimensions(dim1);
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index d6b85630eb979..6959978678540 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -40,6 +40,9 @@ def int_dx_resource_getpointer
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
[IntrNoMem]>;
+def int_dx_check_access_fully_mapped
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+
def int_...
[truncated]
|
|
@llvm/pr-subscribers-clang-codegen Author: Joshua Batista (bob80905) ChangesThis PR adds a new overload for resources, which takes an additional parameter by reference, status. It fills the status parameter with a 1 or 0, depending on whether or not the resource access was mapped. CheckAccessFullyMapped is also added as an intrinsic, and called in the production of this status bit. Patch is 20.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166449.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 2b400b012d6ed..f2b4c54a5ba6b 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4934,6 +4934,12 @@ def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLResourceLoadWithStatus : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_resource_load_with_status"];
+ let Attributes = [NoThrow];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_uninitializedhandle"];
let Attributes = [NoThrow];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index fbf4a5722caed..ff3d906d4bf11 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -353,6 +353,64 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
ArrayRef<Value *>{HandleOp, IndexOp});
}
+ case Builtin::BI__builtin_hlsl_resource_load_with_status: {
+ Value *HandleOp = EmitScalarExpr(E->getArg(0));
+ Value *IndexOp = EmitScalarExpr(E->getArg(1));
+
+ // Get the *address* of the status argument (since it's a reference)
+ LValue StatusLVal = EmitLValue(E->getArg(2));
+ Address StatusAddr = StatusLVal.getAddress();
+
+ QualType HandleTy = E->getArg(0)->getType();
+ const HLSLAttributedResourceType *RT =
+ HandleTy->getAs<HLSLAttributedResourceType>();
+ assert(RT && "Expected a resource type as first parameter");
+
+ Intrinsic::ID IntrID = RT->getAttrs().RawBuffer
+ ? llvm::Intrinsic::dx_resource_load_rawbuffer
+ : llvm::Intrinsic::dx_resource_load_typedbuffer;
+
+ llvm::Type *DataTy = ConvertType(E->getType());
+ llvm::Type *RetTy = llvm::StructType::get(Builder.getContext(),
+ {DataTy, Builder.getInt1Ty()});
+
+ SmallVector<Value *, 3> Args;
+ Args.push_back(HandleOp);
+ Args.push_back(IndexOp);
+
+ if (RT->getAttrs().RawBuffer) {
+ Args.push_back(Builder.getInt32(0)); // dummy offset
+ }
+
+ // Call the intrinsic (returns a struct)
+ Value *ResRet =
+ Builder.CreateIntrinsic(RetTy, IntrID, Args, {}, "ld.struct");
+
+ // Extract the loaded data (first element of the struct)
+ Value *LoadedValue = Builder.CreateExtractValue(ResRet, {0}, "ld.value");
+
+ // Extract the status bit (second element of the struct)
+ Value *StatusBit = Builder.CreateExtractValue(ResRet, {1}, "ld.status");
+
+ // Extend the 1-bit status to 32-bit, because the check access fully mapped
+ // intrinsic expects i32 input
+ Value *StatusInt =
+ Builder.CreateZExt(StatusBit, Builder.getInt32Ty(), "ld.status.i32");
+
+ // Call CheckAccessFullyMapped(i32)
+ llvm::Function *CheckAccessFn =
+ CGM.getIntrinsic(llvm::Intrinsic::dx_check_access_fully_mapped);
+
+ Value *StatusBool =
+ Builder.CreateCall(CheckAccessFn, {StatusInt}, "ld.status.checked");
+
+ // Extend the boolean to 32-bit uint for storing in user�s var
+ Value *ExtendedStatus =
+ Builder.CreateZExt(StatusBool, Builder.getInt32Ty(), "ld.status.ext");
+
+ Builder.CreateStore(ExtendedStatus, StatusAddr);
+ return LoadedValue;
+ }
case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
return llvm::PoisonValue::get(HandleTy);
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index d35df524fdc84..f6c654bd61ff5 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -126,6 +126,9 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateResourceGetPointer,
resource_getpointer)
+
+ GENERATE_HLSL_INTRINSIC_FUNCTION(CreateResourceLoadTypedBuffer,
+ resource_load_typedbuffer)
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding,
resource_handlefrombinding)
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding,
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index 066acf6f01a90..bdbc2d4ce87b3 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -1140,6 +1140,7 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addLoadMethods() {
DeclarationName Load(&II);
// TODO: We also need versions with status for CheckAccessFullyMapped.
addHandleAccessFunction(Load, /*IsConst=*/false, /*IsRef=*/false);
+ addLoadWithStatusFunction(Load, /*IsConst=*/false, /*IsRef=*/false);
return *this;
}
@@ -1232,6 +1233,40 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addDecrementCounterMethod() {
.finalize();
}
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addLoadWithStatusFunction(DeclarationName &Name,
+ bool IsConst, bool IsRef) {
+ assert(!Record->isCompleteDefinition() && "record is already complete");
+ ASTContext &AST = SemaRef.getASTContext();
+ using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+
+ QualType ElemTy = getHandleElementType();
+ QualType AddrSpaceElemTy =
+ AST.getAddrSpaceQualType(ElemTy, LangAS::hlsl_device);
+ QualType ElemPtrTy = AST.getPointerType(AddrSpaceElemTy);
+ QualType ReturnTy;
+
+ if (IsRef) {
+ ReturnTy = AddrSpaceElemTy;
+ if (IsConst)
+ ReturnTy.addConst();
+ ReturnTy = AST.getLValueReferenceType(ReturnTy);
+ } else {
+ ReturnTy = ElemTy;
+ if (IsConst)
+ ReturnTy.addConst();
+ }
+
+ QualType StatusRefTy = AST.getLValueReferenceType(AST.UnsignedIntTy);
+ return BuiltinTypeMethodBuilder(*this, Name, ReturnTy, IsConst)
+ .addParam("Index", AST.UnsignedIntTy)
+ .addParam("Status", StatusRefTy)
+ .callBuiltin("__builtin_hlsl_resource_load_with_status", ElemPtrTy,
+ PH::Handle, PH::_0, PH::_1)
+ .dereference(PH::LastStmt)
+ .finalize();
+}
+
BuiltinTypeDeclBuilder &
BuiltinTypeDeclBuilder::addHandleAccessFunction(DeclarationName &Name,
bool IsConst, bool IsRef) {
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
index 95e3a6c4fb2f1..4941b8b7952a2 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
@@ -91,6 +91,8 @@ class BuiltinTypeDeclBuilder {
BuiltinTypeDeclBuilder &addDecrementCounterMethod();
BuiltinTypeDeclBuilder &addHandleAccessFunction(DeclarationName &Name,
bool IsConst, bool IsRef);
+ BuiltinTypeDeclBuilder &addLoadWithStatusFunction(DeclarationName &Name,
+ bool IsConst, bool IsRef);
BuiltinTypeDeclBuilder &addAppendMethod();
BuiltinTypeDeclBuilder &addConsumeMethod();
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 94a490a8f68dc..f95a647b5679f 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3010,6 +3010,27 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
break;
}
+ case Builtin::BI__builtin_hlsl_resource_load_with_status: {
+ if (SemaRef.checkArgCount(TheCall, 3) ||
+ CheckResourceHandle(&SemaRef, TheCall, 0) ||
+ CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+ SemaRef.getASTContext().UnsignedIntTy) ||
+ CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+ SemaRef.getASTContext().UnsignedIntTy))
+ return true;
+
+ auto *ResourceTy =
+ TheCall->getArg(0)->getType()->castAs<HLSLAttributedResourceType>();
+ QualType ContainedTy = ResourceTy->getContainedType();
+ auto ReturnType =
+ SemaRef.Context.getAddrSpaceQualType(ContainedTy, LangAS::hlsl_device);
+ ReturnType = SemaRef.Context.getPointerType(ReturnType);
+ TheCall->setType(ReturnType);
+ TheCall->setValueKind(VK_LValue);
+
+ break;
+ }
+
case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
if (SemaRef.checkArgCount(TheCall, 1) ||
CheckResourceHandle(&SemaRef, TheCall, 0))
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
index 1f248d0560006..e692a14ecc168 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
@@ -104,6 +104,50 @@ export float TestLoad() {
// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]]
// CHECK-NEXT: ret float %[[VAL]]
+export float TestLoadWithStatus() {
+ uint s1;
+ uint s2;
+ float ret = RWSB1.Load(1, s1) + SB1.Load(2, s2);
+ ret += float(s1 + s2);
+ return ret;
+}
+
+// CHECK: define noundef nofpclass(nan inf) float @TestLoadWithStatus()()
+// CHECK: call {{.*}} float @hlsl::RWStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @RWSB1, i32 noundef 1, ptr noundef nonnull align 4 dereferenceable(4) %s1)
+// CHECK: call {{.*}} float @hlsl::StructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @SB1, i32 noundef 2, ptr noundef nonnull align 4 dereferenceable(4) %s2)
+// CHECK: add
+// CHECK: ret float
+
+// CHECK: define {{.*}} float @hlsl::RWStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWStructuredBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS:.*]] = load ptr, ptr %Status.addr,
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %[[HANDLE]], i32 %[[INDEX]], i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+// CHECK: define {{.*}} float @hlsl::StructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::StructuredBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS_HANDLE:.*]] = load ptr, ptr %Status.addr, align 4, !nonnull !3, !align !4
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0) %0, i32 %1, i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
export uint TestGetDimensions() {
uint dim1, dim2, dim3, stride1, stride2, stride3;
SB1.GetDimensions(dim1, stride1);
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
index 25fa75965d686..77aaf281fbc39 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
@@ -65,6 +65,49 @@ export float TestLoad() {
// CHECK-NEXT: %[[VAL:.*]] = load <2 x i32>, ptr %[[BUFPTR]]
// CHECK-NEXT: ret <2 x i32> %[[VAL]]
+export float TestLoadWithStatus() {
+ uint status;
+ uint status2;
+ float val = ROSB1.Load(10, status).x + ROSB2.Load(20, status2).x;
+ return val + float(status + status2);
+}
+
+// CHECK: define {{.*}} float @TestLoadWithStatus()()
+// CHECK: call {{.*}} float @hlsl::RasterizerOrderedStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @ROSB1, i32 noundef 10, ptr noundef nonnull align 4 dereferenceable(4) %status)
+// CHECK: call {{.*}} <2 x i32> @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::Load(unsigned int, unsigned int&)(ptr {{.*}} @ROSB2, i32 noundef 20, ptr noundef nonnull align 4 dereferenceable(4) %status2)
+// CHECK: ret
+
+// CHECK: define {{.*}} float @hlsl::RasterizerOrderedStructuredBuffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer", ptr {{.*}}, i32 0, i32 0
+// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", float, 1, 1), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS:.*]] = load ptr, ptr %Status.addr,
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %[[HANDLE]], i32 %[[INDEX]], i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+// CHECK: define {{.*}} <2 x i32> @hlsl::RasterizerOrderedStructuredBuffer<int vector[2]>::Load(unsigned int, unsigned int&)(ptr {{.*}} %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedStructuredBuffer.0", ptr {{.*}}, i32 0, i32 0
+// CHECK-NEXT: %[[HANDLE:.*]] = load target("dx.RawBuffer", <2 x i32>, 1, 1), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS_HANDLE:.*]] = load ptr, ptr %Status.addr, align 4, !nonnull !3, !align !4
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.rawbuffer.p0.tdx.RawBuffer_v2i32_1_1t(target("dx.RawBuffer", <2 x i32>, 1, 1) %0, i32 %1, i32 0)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+
export uint TestGetDimensions() {
uint dim1, dim2, stride1, stride2;
ROSB1.GetDimensions(dim1, stride1);
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
index fdc1ef08b7c2c..14607b58c67d9 100644
--- a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
@@ -38,6 +38,50 @@ export float TestLoad() {
// CHECK-NEXT: %[[VEC:.*]] = load <4 x i32>, ptr %[[PTR]]
// CHECK-NEXT: ret <4 x i32> %[[VEC]]
+export float TestLoadWithStatus() {
+ uint s1;
+ uint s2;
+ float ret = Buf.Load(1, s1) + float(RWBuf.Load(2, s2).y);
+ ret += float(s1 + s2);
+ return ret;
+}
+
+// CHECK: define noundef nofpclass(nan inf) float @TestLoadWithStatus()()
+// CHECK: call {{.*}} float @hlsl::Buffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} @Buf, i32 noundef 1, ptr noundef nonnull align 4 dereferenceable(4) %s1)
+// CHECK: call {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int, unsigned int&)(ptr {{.*}} @RWBuf, i32 noundef 2, ptr noundef nonnull align 4 dereferenceable(4) %s2)
+// CHECK: add
+// CHECK: ret float
+
+// CHECK: define {{.*}} float @hlsl::Buffer<float>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", float, 0, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS:.*]] = load ptr, ptr %Status.addr,
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.typedbuffer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0) %[[HANDLE]], i32 %[[INDEX]])
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load float, ptr %[[VALUE]]
+// CHECK-NEXT: ret float %[[RETVAL]]
+
+// CHECK: define {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int, unsigned int&)(ptr {{.*}} %this, i32 noundef %Index, ptr noundef nonnull align 4 dereferenceable(4) %Status)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// CHECK-NEXT: %[[STATUS_HANDLE:.*]] = load ptr, ptr %Status.addr, align 4, !nonnull !3, !align !4
+// DXIL-NEXT: %[[STRUCT:.*]] = call { ptr, i1 } @llvm.dx.resource.load.typedbuffer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %0, i32 %1)
+// CHECK-NEXT: %[[VALUE:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 0
+// CHECK-NEXT: %[[STATUS_TEMP:.*]] = extractvalue { ptr, i1 } %[[STRUCT]], 1
+// CHECK-NEXT: %[[STATUS_EXT:.*]] = zext i1 %[[STATUS_TEMP]] to i32
+// CHECK-NEXT: %[[STATUS_CHECKED:.*]] = call i1 @llvm.dx.check.access.fully.mapped(i32 %[[STATUS_EXT:.*]])
+// CHECK-NEXT: %[[STATUS_REEXT:.*]] = zext i1 %[[STATUS_CHECKED]] to i32
+// CHECK-NEXT: store i32 %[[STATUS_REEXT]], ptr %2, align 4
+// CHECK-NEXT: %[[RETVAL:.*]] = load <4 x i32>, ptr %[[VALUE]]
+// CHECK-NEXT: ret <4 x i32> %[[RETVAL]]
+
export uint TestGetDimensions() {
uint dim1, dim2;
Buf.GetDimensions(dim1);
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index d6b85630eb979..6959978678540 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -40,6 +40,9 @@ def int_dx_resource_getpointer
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
[IntrNoMem]>;
+def int_dx_check_access_fully_mapped
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+
def int_...
[truncated]
|
3b0f1fa to
ec4e3a9
Compare
| def int_spv_resource_load_with_status | ||
| : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty], | ||
| [IntrNoMem]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I opened #166954 to track the implementing this in the SPIR-V backend.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we aren't actually generating this ever, and the logic for DirectX has special handling depending on the resource type, it's probably best not to define this intrinsic at this point. @bob80905 was there an existing issue to support Load(int, out uint) and CheckAccessFullyMapped in SPIR-V, or should we use #166954 (in which case we need to add some detail there)?
| def int_dx_resource_load_with_status | ||
| : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty], | ||
| [IntrNoMem]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We never generate this.
| def int_spv_resource_load_with_status | ||
| : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty], | ||
| [IntrNoMem]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we aren't actually generating this ever, and the logic for DirectX has special handling depending on the resource type, it's probably best not to define this intrinsic at this point. @bob80905 was there an existing issue to support Load(int, out uint) and CheckAccessFullyMapped in SPIR-V, or should we use #166954 (in which case we need to add some detail there)?
| [[nodiscard]] bool lowerLoadWithStatus(Function &F) { | ||
| // These should have already been handled in DXILResourceAccess, so we can | ||
| // just clean up the dead prototype. | ||
| assert(F.user_empty() && "getpointer operations should have been removed"); | ||
| F.eraseFromParent(); | ||
| return false; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't make sense - this isn't a getpointer operation and it wouldn't be removed. Probably a vestige from an earlier version when you were creating a separate intrinsic that can be removed at this point.
| // This is our proof that the module requires TiledResources | ||
| // to be set, as if check access fully mapped was used. | ||
| bool checkIfStatusIsExtracted(const Instruction &I) { | ||
| // Iterate over all uses of the instruction |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comments that just state what reading the next line of code tells you aren't terribly helpful. For comments to be helpful they should either describe what's going on at a higher level or explain why we're doing something.
| // instruction is ever extracted. | ||
| // This is our proof that the module requires TiledResources | ||
| // to be set, as if check access fully mapped was used. | ||
| bool checkIfStatusIsExtracted(const Instruction &I) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be worth asserting that I has an appropriate intrinsic ID to catch if someone accidentally calls this function on some arbitrary instruction.
|
|
||
| // Check if the user is an ExtractValue instruction | ||
| if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(UserInst)) { | ||
| // ExtractValueInst has a list of indices; check if it extracts index 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to explain what we specifically expect, like "Resource load operations return a {result, status} pair - check if we extract the status"
clang/lib/CodeGen/CGHLSLBuiltins.cpp
Outdated
| Args.push_back(IndexOp); | ||
|
|
||
| if (RT->getAttrs().RawBuffer) { | ||
| Args.push_back(Builder.getInt32(0)); // dummy offset |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it'd be clearer to introduce an extra variable to say what this argument is, instead of this comment. That is
| Args.push_back(Builder.getInt32(0)); // dummy offset | |
| Value *Offset = Builder.getInt32(0); | |
| Args.push_back(Offset); |
clang/lib/CodeGen/CGHLSLBuiltins.cpp
Outdated
| // Call the intrinsic (returns a struct) | ||
| Value *ResRet = | ||
| Builder.CreateIntrinsic(RetTy, IntrID, Args, {}, "ld.struct"); | ||
|
|
||
| // Extract the loaded data (first element of the struct) | ||
| Value *LoadedValue = Builder.CreateExtractValue(ResRet, {0}, "ld.value"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd probably combine all of these per-statement comments into one that explains the sequence of operations at a higher level, something like:
// Load the `{value, status}` struct, store the status into the output
// argument, and return the value.
Value *ResRet = ...
Value *LoadedValue = ...
// ...
return LoadedValue;
We need a separate issue for SPIR-V regardless, since we don't have a SPIR-V implementation in this PR. I think we should reword #138910 to say "for DirectX", and update the title and description of #166954 to split out the SPIR-V specific parts. |
Done! |
clang/lib/CodeGen/CGHLSLBuiltins.cpp
Outdated
| // Call the intrinsic (returns a struct), | ||
| // Extract the loaded value and status bit (elements within the struct) | ||
| // Extend the status bit to a 32-bit integer | ||
| // Store the extended status into the user's reference variable | ||
| // Return the loaded value |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment just repeats what the code below says almost verbatim. The value of a comment here would be describing what the structure looks like or saying at a high level what we're doing here. Just listing the functions we're about to call isn't helpful.
I think the interesting thing to convey here is really "what does the intrinsic return and what do we need to do with it?", not "how do we extract values from some struct Value and zero-extend the result?".
Consider something like
The load intrinsics give us a
(T value, i1 status)pair - shepherd these into the return value and out reference respectively.
Given this, hopefully the actual nuts and bolts of doing the operation are made obvious.
clang/lib/CodeGen/CGHLSLBuiltins.cpp
Outdated
| Builder.CreateIntrinsic(RetTy, IntrID, Args, {}, "ld.struct"); | ||
|
|
||
| Value *LoadedValue = Builder.CreateExtractValue(ResRet, {0}, "ld.value"); | ||
|
|
||
| Value *StatusBit = Builder.CreateExtractValue(ResRet, {1}, "ld.status"); | ||
|
|
||
| Value *ExtendedStatus = | ||
| Builder.CreateZExt(StatusBit, Builder.getInt32Ty(), "ld.status.ext"); | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably helps readability to get rid of the empty lines here - this is a block of tightly related code.
| auto *II = dyn_cast<IntrinsicInst>(&I); | ||
| assert(II); | ||
| auto IID = II->getIntrinsicID(); | ||
| assert(IID == Intrinsic::dx_resource_load_typedbuffer || | ||
| IID == Intrinsic::dx_resource_load_rawbuffer); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you change the signature of the function to take an const IntrinsicInst & instead of an Instruction you wouldn't need the extra cast and assert here. The callers already have an IntrinsicInst in the form of II anyway!
Also please include a message in the assert - it makes things easier to figure out without necessarily having to open up the source file at the line of the assert.
| assert(II && "intrinsic instruction expected in checkIfStatusIsExtracted"); | ||
| auto IID = II->getIntrinsicID(); | ||
| bool checkIfStatusIsExtracted(const IntrinsicInst &II) { | ||
| auto IID = II.getIntrinsicID(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better not to use auto here. Also we probably need [[maybe_unused]] to avoid an unused variable warning in release builds.
This PR adds a new overload for resources, which takes an additional parameter by reference, status. It fills the status parameter with a 1 or 0, depending on whether or not the resource access was mapped. CheckAccessFullyMapped is also added as an intrinsic, and called in the production of this status bit.
Only addresses DXIL for the below issue:
#138910