From 51b62e3b28e18c5f4366b6fb4aeb1efef26f1b00 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 4 Nov 2025 16:05:53 -0800 Subject: [PATCH 1/8] [mlir][acc] Add ACCImplicitData pass for implicit data attributes This change adds the ACCImplicitData pass which implements the OpenACC specification for "Variables with Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2). The pass automatically generates data clause operations (copyin, copyout, present, firstprivate, etc.) for variables used within OpenACC compute constructs (parallel, kernels, serial) that do not already have explicit data clauses. Key features: - Respects default(none) and default(present) clauses - Handles scalar vs. aggregate variables with different semantics: * Aggregates: present clause (if default(present)) or copy clause * Scalars: copy clause (kernels) or firstprivate (parallel/serial) - Generates privatization recipes when needed for firstprivate clauses - Performs alias analysis to avoid redundant data mappings - Ensures proper data clause ordering for partial entity access - Generates exit operations (copyout, delete) to match entry operations Requirements: - Types must implement acc::MappableType and/or acc::PointerLikeType interfaces to be considered candidates. - Operations accessing partial entities or creating subviews should implement acc::PartialEntityAccess and/or mlir::ViewLikeOpInterface for proper clause ordering. - Optionally pre-register acc::OpenACCSupport and mlir::AliasAnalysis if custom alias analysis, variable name determination, or error reporting is needed. --- .../mlir/Dialect/OpenACC/Transforms/Passes.h | 3 + .../mlir/Dialect/OpenACC/Transforms/Passes.td | 36 + .../OpenACC/Transforms/ACCImplicitData.cpp | 904 ++++++++++++++++++ .../Dialect/OpenACC/Transforms/CMakeLists.txt | 4 + .../OpenACC/acc-implicit-data-reduction.mlir | 109 +++ .../Dialect/OpenACC/acc-implicit-data.mlir | 224 +++++ 6 files changed, 1280 insertions(+) create mode 100644 mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp create mode 100644 mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir create mode 100644 mlir/test/Dialect/OpenACC/acc-implicit-data.mlir diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h index 57d532b078b9e..27f65aa15f040 100644 --- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h @@ -9,6 +9,9 @@ #ifndef MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES_H #define MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES_H +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/Pass/Pass.h" namespace mlir { diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td index 9ceb91e5679a1..40ccd1fc6c1a0 100644 --- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td @@ -27,4 +27,40 @@ def LegalizeDataValuesInRegion : Pass<"openacc-legalize-data-values", "mlir::fun ]; } +def ACCImplicitData : Pass<"acc-implicit-data", "mlir::ModuleOp"> { + let summary = "Generate implicit data attributes for OpenACC compute constructs"; + let description = [{ + This pass implements the OpenACC specification for "Variables with + Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2). + + The pass automatically generates data clause operations for variables used + within OpenACC compute constructs (parallel, kernels, serial) that do not + already have explicit data clauses. The semantics follow these rules: + + 1. If there is a default(none) clause visible, no implicit data actions + apply. + + 2. An aggregate variable (arrays, derived types, etc.) will be treated as: + - In a present clause when default(present) is visible. + - In a copy clause otherwise. + + 3. A scalar variable will be treated as if it appears in: + - A copy clause if the compute construct is a kernels construct. + - A firstprivate clause otherwise (parallel, serial). + }]; + let dependentDialects = ["mlir::acc::OpenACCDialect", + "mlir::memref::MemRefDialect", + "mlir::arith::ArithDialect"]; + let options = [ + Option<"enableImplicitReductionCopy", "enable-implicit-reduction-copy", + "bool", "true", + "Enable applying implicit copy in lieu of implicit firstprivate for " + "reduction variables. This allows uniform treatment of reduction " + "variables between combined constructs (e.g., 'parallel loop') and " + "separate constructs (e.g., 'parallel' followed by 'loop'), where " + "the OpenACC spec requires copy semantics for the former but " + "firstprivate would normally apply for the latter."> + ]; +} + #endif // MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp new file mode 100644 index 0000000000000..88de14950cd70 --- /dev/null +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -0,0 +1,904 @@ +//===- ACCImplicitData.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements the OpenACC specification for "Variables with +// Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2). +// +// Overview: +// --------- +// The pass automatically generates data clause operations for variables used +// within OpenACC compute constructs (parallel, kernels, serial) that do not +// already have explicit data clauses. The semantics follow these rules: +// +// 1. If there is a default(none) clause visible, no implicit data actions +// apply. +// +// 2. An aggregate variable (arrays, derived types, etc.) will be treated as: +// - In a present clause when default(present) is visible. +// - In a copy clause otherwise. +// +// 3. A scalar variable will be treated as if it appears in: +// - A copy clause if the compute construct is a kernels construct. +// - A firstprivate clause otherwise (parallel, serial). +// +// Requirements: +// ------------- +// To use this pass in a pipeline, the following requirements must be met: +// +// 1. Type Interface Implementation: Variables from the dialect being used +// must implement one or both of the following MLIR interfaces: +// `acc::MappableType` and/or `acc::PointerLikeType` +// +// These interfaces provide the necessary methods for the pass to: +// - Determine variable type categories (scalar vs. aggregate) +// - Generate appropriate bounds information +// - Generate privatization recipes +// +// 2. Operation Interface Implementation: Operations that access partial +// entities or create views should implement the following MLIR +// interfaces: `acc::PartialEntityAccess` and/or +// `mlir::ViewLikeOpInterface` +// +// These interfaces are used for proper data clause ordering, ensuring +// that base entities are mapped before derived entities (e.g., a +// struct is mapped before its fields, an array is mapped before +// subarray views). +// +// 3. Analysis Registration (Optional): If custom behavior is needed for +// variable name extraction or alias analysis, the dialect should +// pre-register the `acc::OpenACCSupport` and `mlir::AliasAnalysis` analyses. +// +// If not registered, default behavior will be used. +// +// Implementation Details: +// ----------------------- +// The pass performs the following operations: +// +// 1. Finds candidate variables which are live-in to the compute region and +// are not already in a data clause or private clause. +// +// 2. Generates both data "entry" and "exit" clause operations that match +// the intended action depending on variable type: +// - copy -> acc.copyin (entry) + acc.copyout (exit) +// - present -> acc.present (entry) + acc.delete (exit) +// - firstprivate -> acc.firstprivate (entry only, no exit) +// +// 3. Ensures that default clause is taken into consideration by looking +// through current construct and parent constructs to find the "visible +// default clause". +// +// 4. Fixes up SSA value links so that uses in the acc region reference the +// result of the newly created data clause operations. +// +// 5. When generating implicit data clause operations, it also adds variable +// name information and marks them with the implicit flag. +// +// 6. Recipes are generated by calling the appropriate entrypoints in the +// MappableType and PointerLikeType interfaces. +// +// 7. AliasAnalysis is used to determine if a variable is already covered by +// an existing data clause (e.g., an interior pointer covered by its parent). +// +// Examples: +// --------- +// +// Example 1: Scalar in parallel construct (implicit firstprivate) +// +// Before: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "x"} : memref +// acc.parallel { +// %val = memref.load %scalar[] : memref +// acc.yield +// } +// } +// +// After: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "x"} : memref +// %firstpriv = acc.firstprivate varPtr(%scalar : memref) +// -> memref {implicit = true, name = "x"} +// acc.parallel firstprivate(@recipe -> %firstpriv : memref) { +// %val = memref.load %firstpriv[] : memref +// acc.yield +// } +// } +// +// Example 2: Scalar in kernels construct (implicit copy) +// +// Before: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "n"} : memref +// acc.kernels { +// %val = memref.load %scalar[] : memref +// acc.terminator +// } +// } +// +// After: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "n"} : memref +// %copyin = acc.copyin varPtr(%scalar : memref) -> memref +// {dataClause = #acc, +// implicit = true, name = "n"} +// acc.kernels dataOperands(%copyin : memref) { +// %val = memref.load %copyin[] : memref +// acc.terminator +// } +// acc.copyout accPtr(%copyin : memref) +// to varPtr(%scalar : memref) +// {dataClause = #acc, +// implicit = true, name = "n"} +// } +// +// Example 3: Array (aggregate) in parallel (implicit copy) +// +// Before: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// acc.parallel { +// %c0 = arith.constant 0 : index +// %val = memref.load %array[%c0] : memref<100xf32> +// acc.yield +// } +// } +// +// After: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// %copyin = acc.copyin varPtr(%array : memref<100xf32>) +// -> memref<100xf32> +// {dataClause = #acc, +// implicit = true, name = "arr"} +// acc.parallel dataOperands(%copyin : memref<100xf32>) { +// %c0 = arith.constant 0 : index +// %val = memref.load %copyin[%c0] : memref<100xf32> +// acc.yield +// } +// acc.copyout accPtr(%copyin : memref<100xf32>) +// to varPtr(%array : memref<100xf32>) +// {dataClause = #acc, +// implicit = true, name = "arr"} +// } +// +// Example 4: Array with default(present) +// +// Before: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// acc.parallel { +// %c0 = arith.constant 0 : index +// %val = memref.load %array[%c0] : memref<100xf32> +// acc.yield +// } attributes {defaultAttr = #acc} +// } +// +// After: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// %present = acc.present varPtr(%array : memref<100xf32>) +// -> memref<100xf32> +// {implicit = true, name = "arr"} +// acc.parallel dataOperands(%present : memref<100xf32>) +// attributes {defaultAttr = #acc} { +// %c0 = arith.constant 0 : index +// %val = memref.load %present[%c0] : memref<100xf32> +// acc.yield +// } +// acc.delete accPtr(%present : memref<100xf32>) +// {dataClause = #acc, +// implicit = true, name = "arr"} +// } +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/OpenACC/Transforms/Passes.h" + +#include "mlir/Analysis/AliasAnalysis.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/Dialect/OpenACC/OpenACCUtils.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Dominance.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Value.h" +#include "mlir/Interfaces/ViewLikeInterface.h" +#include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/ErrorHandling.h" +#include + +namespace mlir { +namespace acc { +#define GEN_PASS_DEF_ACCIMPLICITDATA +#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc" +} // namespace acc +} // namespace mlir + +#define DEBUG_TYPE "acc-implicit-data" + +using namespace mlir; + +namespace { + +class ACCImplicitData + : public mlir::acc::impl::ACCImplicitDataBase { +public: + using mlir::acc::impl::ACCImplicitDataBase< + ACCImplicitData>::ACCImplicitDataBase; + + void runOnOperation() override; + +private: + /// Collects all data clauses that dominate the compute construct. + /// Needed to determine if a variable is already covered by an existing data + /// clause. + SmallVector + getDominatingDataClauses(Operation *computeConstructOp); + + /// Looks through the `dominatingDataClauses` to find the original data clause + /// op for an alias. Returns nullptr if no original data clause op is found. + template + Operation *getOriginalDataClauseOpForAlias( + Value var, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses); + + /// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`, + /// etc. data clause op for a candidate variable. + template + Operation *generateDataClauseOpForCandidate( + Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses, + const std::optional &defaultClause); + + /// Generates the implicit data ops for a compute construct. + template + void generateImplicitDataOps( + ModuleOp &module, OpT computeConstructOp, + std::optional &defaultClause); + + /// Generates a private recipe for a variable. + acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, mlir::Value var, + mlir::Location loc, + OpBuilder &builder, + acc::OpenACCSupport &accSupport); + + /// Generates a firstprivate recipe for a variable. + acc::FirstprivateRecipeOp + generateFirstprivateRecipe(ModuleOp &module, mlir::Value var, + mlir::Location loc, OpBuilder &builder, + acc::OpenACCSupport &accSupport); + + /// Generates recipes for a list of variables. + void generateRecipes(ModuleOp &module, OpBuilder &builder, + Operation *computeConstructOp, + const SmallVector &newOperands, + SmallVector &newRecipeSyms); +}; + +/// Determines if a variable is a candidate for implicit data mapping. +/// Returns true if the variable is a candidate, false otherwise. +static bool isCandidateForImplicitData(Value val, Region &accRegion) { + // Ensure the variable is an allowed type for data clause. + if (!acc::isPointerLikeType(val.getType()) && + !acc::isMappableType(val.getType())) { + return false; + } + + // If this is already coming from a data clause, we do not need to generate + // another. + if (isa_and_nonnull(val.getDefiningOp())) { + return false; + } + + // If this is only used by private clauses, it is not a real live-in. + if (acc::isOnlyUsedByPrivateClauses(val, accRegion)) { + return false; + } + + return true; +} + +SmallVector +ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { + llvm::SmallSetVector dominatingDataClauses; + + llvm::TypeSwitch(computeConstructOp) + .Case([&](auto op) { + for (auto dataClause : op.getDataClauseOperands()) { + dominatingDataClauses.insert(dataClause); + } + }) + .Default([](Operation *) {}); + + // Collect the data clauses from enclosing data constructs. + Operation *currParentOp = computeConstructOp->getParentOp(); + while (currParentOp) { + if (isa(currParentOp)) { + for (auto dataClause : + dyn_cast(currParentOp).getDataClauseOperands()) { + dominatingDataClauses.insert(dataClause); + } + } + currParentOp = currParentOp->getParentOp(); + } + + // Find the enclosing function/subroutine + Operation *funcOp = computeConstructOp->getParentOfType(); + + if (!funcOp) { + return dominatingDataClauses.takeVector(); + } + + // Walk the function to find `acc.declare_enter`/`acc.declare_exit` pairs that + // dominate and post-dominate the compute construct and add their data + // clauses to the list. + auto &domInfo = this->getAnalysis(); + auto &postDomInfo = this->getAnalysis(); + funcOp->walk([&](mlir::acc::DeclareEnterOp declareEnterOp) { + if (domInfo.dominates(declareEnterOp.getOperation(), computeConstructOp)) { + // Collect all `acc.declare_exit` ops for this token. + SmallVector exits; + for (auto *user : declareEnterOp.getToken().getUsers()) { + if (auto declareExit = dyn_cast(user)) { + exits.push_back(declareExit); + } + } + // Only add clauses if every `acc.declare_exit` op post-dominates the + // compute construct. + if (!exits.empty() && llvm::all_of(exits, [&](acc::DeclareExitOp exitOp) { + return postDomInfo.postDominates(exitOp, computeConstructOp); + })) { + for (auto dataClause : declareEnterOp.getDataClauseOperands()) { + dominatingDataClauses.insert(dataClause); + } + } + } + }); + + return dominatingDataClauses.takeVector(); +} + +template +Operation *ACCImplicitData::getOriginalDataClauseOpForAlias( + Value var, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses) { + auto &aliasAnalysis = this->getAnalysis(); + for (auto dataClause : dominatingDataClauses) { + if (auto *dataClauseOp = dataClause.getDefiningOp()) { + // Only accept clauses that guarantee that the alias is present. + if (isa(dataClauseOp)) { + if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust()) { + return dataClauseOp; + } + } + } + } + return nullptr; +} + +// Generates bounds for variables that have unknown dimensions +static void fillInBoundsForUnknownDimensions(mlir::Operation *dataClauseOp, + OpBuilder &builder) { + + if (!mlir::acc::getBounds(dataClauseOp).empty()) + // If bounds are already present, do not overwrite them. + return; + + // For types that have unknown dimensions, attempt to generate bounds by + // relying on MappableType being able to extract it from the IR. + auto var = mlir::acc::getVar(dataClauseOp); + auto type = var.getType(); + if (auto mappableTy = dyn_cast(type)) { + if (mappableTy.hasUnknownDimensions()) { + TypeSwitch(dataClauseOp) + .Case([&](auto dataClauseOp) { + if (std::is_same_v) + return; + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPoint(dataClauseOp); + auto bounds = mappableTy.generateAccBounds(var, builder); + if (!bounds.empty()) + dataClauseOp.getBoundsMutable().assign(bounds); + }); + } + } +} + +acc::PrivateRecipeOp +ACCImplicitData::generatePrivateRecipe(ModuleOp &module, mlir::Value var, + mlir::Location loc, OpBuilder &builder, + acc::OpenACCSupport &accSupport) { + auto type = var.getType(); + std::string recipeName = + accSupport.getRecipeName(acc::RecipeKind::private_recipe, type, var); + + // Check if recipe already exists + auto existingRecipe = module.lookupSymbol(recipeName); + if (existingRecipe) + return existingRecipe; + + // Set insertion point to module body in a scoped way + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(module.getBody()); + + auto recipe = mlir::acc::PrivateRecipeOp::createAndPopulate(builder, loc, + recipeName, type); + if (!recipe.has_value()) + return accSupport.emitNYI(loc, "implicit private"), nullptr; + return recipe.value(); +} + +acc::FirstprivateRecipeOp ACCImplicitData::generateFirstprivateRecipe( + ModuleOp &module, mlir::Value var, mlir::Location loc, OpBuilder &builder, + acc::OpenACCSupport &accSupport) { + auto type = var.getType(); + auto recipeName = + accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var); + + // Check if recipe already exists + auto existingRecipe = + module.lookupSymbol(recipeName); + if (existingRecipe) + return existingRecipe; + + // Set insertion point to module body in a scoped way + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(module.getBody()); + + auto recipe = mlir::acc::FirstprivateRecipeOp::createAndPopulate( + builder, loc, recipeName, type); + if (!recipe.has_value()) + return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr; + return recipe.value(); +} + +void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder, + Operation *computeConstructOp, + const SmallVector &newOperands, + SmallVector &newRecipeSyms) { + auto &accSupport = this->getAnalysis(); + for (auto var : newOperands) { + auto loc{var.getLoc()}; + if (isa(var.getDefiningOp())) { + auto recipe = generatePrivateRecipe( + module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); + if (recipe) + newRecipeSyms.push_back(mlir::SymbolRefAttr::get( + module->getContext(), recipe.getSymName().str())); + } else if (isa(var.getDefiningOp())) { + auto recipe = generateFirstprivateRecipe( + module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); + if (recipe) + newRecipeSyms.push_back(mlir::SymbolRefAttr::get( + module->getContext(), recipe.getSymName().str())); + } else { + accSupport.emitNYI(var.getLoc(), "implicit reduction"); + } + } +} + +// Generates the data entry data op clause so that it adheres to OpenACC +// rules as follows (line numbers and specification from OpenACC 3.4): +// 1388 An aggregate variable will be treated as if it appears either: +// 1389 - In a present clause if there is a default(present) clause visible at +// the compute construct. +// 1391 - In a copy clause otherwise. +// 1392 A scalar variable will be treated as if it appears either: +// 1393 - In a copy clause if the compute construct is a kernels construct. +// 1394 - In a firstprivate clause otherwise. +template +Operation *ACCImplicitData::generateDataClauseOpForCandidate( + Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses, + const std::optional &defaultClause) { + auto &accSupport = this->getAnalysis(); + acc::VariableTypeCategory typeCategory = + acc::VariableTypeCategory::uncategorized; + if (auto mappableTy = dyn_cast(var.getType())) { + typeCategory = mappableTy.getTypeCategory(var); + } else if (auto pointerLikeTy = + dyn_cast(var.getType())) { + typeCategory = pointerLikeTy.getPointeeTypeCategory( + cast>(var), + pointerLikeTy.getElementType()); + } + + bool isScalar = + acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar); + bool isAnyAggregate = acc::bitEnumContainsAny( + typeCategory, acc::VariableTypeCategory::aggregate); + mlir::Location loc = computeConstructOp->getLoc(); + + Operation *op = nullptr; + op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp, + dominatingDataClauses); + if (op) { + if (isa(op)) { + return acc::NoCreateOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var), + acc::getBounds(op)); + } + if (isa(op)) { + return acc::DevicePtrOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var), + acc::getBounds(op)); + } + { + // The original data clause op is a PresentOp, CopyinOp, or CreateOp, + // hence guaranteed to be present. + return acc::PresentOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var), + acc::getBounds(op)); + } + } else if (isScalar) { + if (enableImplicitReductionCopy && + acc::isOnlyUsedByReductionClauses(var, + computeConstructOp->getRegion(0))) { + auto copyinOp = + acc::CopyinOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + copyinOp.setDataClause(acc::DataClause::acc_reduction); + return copyinOp.getOperation(); + } + if constexpr (std::is_same_v || + std::is_same_v) { + // Scalars are implicit copyin in kernels construct. + // We also do the same for acc.kernel_environment because semantics + // of user variable mappings should be applied while ACC construct exists + // and at this point we should only be dealing with unmapped variables + // that were made live-in by the compiler. + // TODO: This may be revisited. + auto copyinOp = + acc::CopyinOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + copyinOp.setDataClause(acc::DataClause::acc_copy); + return copyinOp.getOperation(); + } else { + // Scalars are implicit firstprivate in parallel and serial construct. + return acc::FirstprivateOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + } + } else if (isAnyAggregate) { + Operation *newDataOp = nullptr; + + // When default(present) is true, the implicit behavior is present. + if (defaultClause.has_value() && + defaultClause.value() == acc::ClauseDefaultValue::Present) { + newDataOp = acc::PresentOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + } else { + SmallVector bounds; + auto copyinOp = + acc::CopyinOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + copyinOp.setDataClause(acc::DataClause::acc_copy); + newDataOp = copyinOp.getOperation(); + } + + return newDataOp; + } else { + // This is not a fatal error - for example when the element type is + // pointer type (aka we have a pointer of pointer), it is potentially a + // deep copy scenario which is not being handled here. + // Other types need to be canonicalized. Thus just log unhandled cases. + LLVM_DEBUG(llvm::dbgs() + << "Unhandled case for implicit data mapping " << var << "\n"); + } + return nullptr; +} + +// Ensures that result values from the acc data clause ops are used inside the +// acc region. ie: +// acc.kernels { +// use %val +// } +// => +// %dev = acc.dataop %val +// acc.kernels { +// use %dev +// } +static void legalizeValuesInRegion(Region &accRegion, + SmallVector &newPrivateOperands, + SmallVector &newDataClauseOperands) { + for (Value dataClause : + llvm::concat(newDataClauseOperands, newPrivateOperands)) { + Value var = acc::getVar(dataClause.getDefiningOp()); + replaceAllUsesInRegionWith(var, dataClause, accRegion); + } +} + +// Adds the private operands and private recipes to the data construct +// operation in a valid way (ensures that the index in the privatizationRecipes +// array matches the position of the private operand). +template +static void +addNewPrivateOperands(OpT &accOp, mlir::SmallVector &privateOperands, + mlir::SmallVector &privateRecipeSyms) { + assert(privateOperands.size() == privateRecipeSyms.size()); + if (privateOperands.empty()) + return; + + mlir::SmallVector completePrivateRecipesSyms; + mlir::SmallVector completeFirstprivateRecipesSyms; + mlir::SmallVector newPrivateOperands; + mlir::SmallVector newFirstprivateOperands; + + // Collect all of the existing recipes since they are held in an attribute. + // To add to it, we need to create a brand new one. + if (accOp.getPrivatizationRecipes().has_value()) { + for (auto privatization : accOp.getPrivatizationRecipesAttr()) { + completePrivateRecipesSyms.push_back(privatization); + } + } + if (accOp.getFirstprivatizationRecipes().has_value()) { + for (auto privatization : accOp.getFirstprivatizationRecipesAttr()) { + completeFirstprivateRecipesSyms.push_back(privatization); + } + } + + // Now separate between private and firstprivate operands. + for (auto [priv, privateRecipeSym] : + llvm::zip(privateOperands, privateRecipeSyms)) { + if (isa(priv.getDefiningOp())) { + newPrivateOperands.push_back(priv); + completePrivateRecipesSyms.push_back(privateRecipeSym); + } else if (isa(priv.getDefiningOp())) { + newFirstprivateOperands.push_back(priv); + completeFirstprivateRecipesSyms.push_back(privateRecipeSym); + } else { + llvm_unreachable("unhandled private operand"); + } + } + + // Append all of the new private operands to their appropriate list. + accOp.getPrivateOperandsMutable().append(newPrivateOperands); + accOp.getFirstprivateOperandsMutable().append(newFirstprivateOperands); + + // Update the privatizationRecipes attributes to hold all of the new recipes. + if (!completePrivateRecipesSyms.empty()) { + accOp.setPrivatizationRecipesAttr( + mlir::ArrayAttr::get(accOp.getContext(), completePrivateRecipesSyms)); + } + if (!completeFirstprivateRecipesSyms.empty()) { + accOp.setFirstprivatizationRecipesAttr(mlir::ArrayAttr::get( + accOp.getContext(), completeFirstprivateRecipesSyms)); + } +} + +static mlir::Operation *findDataExitOp(mlir::Operation *dataEntryOp) { + auto res = acc::getAccVar(dataEntryOp); + for (auto *user : res.getUsers()) + if (isa(user)) + return user; + return nullptr; +} + +// Generates matching data exit operation as described in the acc dialect +// for how data clauses are decomposed: +// https://mlir.llvm.org/docs/Dialects/OpenACCDialect/#operation-categories +// Key ones used here: +// * acc {construct} copy -> acc.copyin (before region) + acc.copyout (after +// region) +// * acc {construct} present -> acc.present (before region) + acc.delete +// (after region) +static void +generateDataExitOperations(OpBuilder &builder, Operation *accOp, + mlir::SmallVector &newDataClauseOperands, + mlir::SmallVector &sortedDataClauseOperands) { + builder.setInsertionPointAfter(accOp); + Value lastDataClause = nullptr; + for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) { + if (llvm::find(newDataClauseOperands, dataEntry) == + newDataClauseOperands.end()) { + // If this is not a new data clause operand, we should not generate an + // exit operation for it. + lastDataClause = dataEntry; + continue; + } + if (lastDataClause) + if (auto *dataExitOp = findDataExitOp(lastDataClause.getDefiningOp())) + builder.setInsertionPointAfter(dataExitOp); + Operation *dataEntryOp = dataEntry.getDefiningOp(); + if (isa(dataEntryOp)) { + auto copyoutOp = acc::CopyoutOp::create( + builder, dataEntryOp->getLoc(), dataEntry, acc::getVar(dataEntryOp), + /*structured=*/true, /*implicit=*/true, + acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp)); + copyoutOp.setDataClause(acc::DataClause::acc_copy); + } else if (isa(dataEntryOp)) { + auto deleteOp = acc::DeleteOp::create( + builder, dataEntryOp->getLoc(), dataEntry, + /*structured=*/true, /*implicit=*/true, + acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp)); + deleteOp.setDataClause(acc::getDataClause(dataEntryOp).value()); + } else if (isa(dataEntryOp)) { + // Do nothing. + } else { + llvm_unreachable("unhandled data exit"); + } + lastDataClause = dataEntry; + } +} + +/// Returns all base references of a value in order. +/// So for example, if we have a reference to a struct field like +/// s.f1.f2.f3, this will return . +/// Any intermediate casts/view-like operations are included in the +/// chain as well. +static llvm::SmallVector getBaseRefsChain(Value val) { + llvm::SmallVector baseRefs; + baseRefs.push_back(val); + while (true) { + Value prevVal = val; + + val = mlir::acc::getBaseEntity(val); + if (val != baseRefs.front()) + baseRefs.insert(baseRefs.begin(), val); + + // If this is a view-like operation, it is effectively another + // view of the same entity so we should add it to the chain also. + if (auto viewLikeOp = val.getDefiningOp()) { + val = viewLikeOp.getViewSource(); + baseRefs.insert(baseRefs.begin(), val); + } + + // Continue loop if we made any progress + if (val == prevVal) + break; + } + + return baseRefs; +} + +static void +insertInSortedOrder(llvm::SmallVector &sortedDataClauseOperands, + Operation *newClause) { + auto *insertPos = + std::find_if(sortedDataClauseOperands.begin(), + sortedDataClauseOperands.end(), [&](Value dataClauseVal) { + // Get the base refs for the current clause we are looking + // at. + auto var = acc::getVar(dataClauseVal.getDefiningOp()); + auto baseRefs = getBaseRefsChain(var); + + // If the newClause is of a base ref of an existing clause, + // we should insert it right before the current clause. + // Thus return true to stop iteration when this is the + // case. + return std::find(baseRefs.begin(), baseRefs.end(), + acc::getVar(newClause)) != baseRefs.end(); + }); + + if (insertPos != sortedDataClauseOperands.end()) { + newClause->moveBefore(insertPos->getDefiningOp()); + sortedDataClauseOperands.insert(insertPos, acc::getAccVar(newClause)); + } else { + sortedDataClauseOperands.push_back(acc::getAccVar(newClause)); + } +} + +template +void ACCImplicitData::generateImplicitDataOps( + ModuleOp &module, OpT computeConstructOp, + std::optional &defaultClause) { + // Implicit data attributes are only applied if "[t]here is no default(none) + // clause visible at the compute construct." + if (defaultClause.has_value() && + defaultClause.value() == acc::ClauseDefaultValue::None) { + return; + } + assert(!defaultClause.has_value() || + defaultClause.value() == acc::ClauseDefaultValue::Present); + + // 1) Collect live-in values. + Region &accRegion = computeConstructOp->getRegion(0); + SetVector liveInValues; + getUsedValuesDefinedAbove(accRegion, liveInValues); + + // 2) Run the filtering to find relevant pointers that need copied. + auto isCandidate{[&](Value val) -> bool { + return isCandidateForImplicitData(val, accRegion); + }}; + auto candidateVars( + llvm::to_vector(llvm::make_filter_range(liveInValues, isCandidate))); + if (candidateVars.empty()) + return; + + // 3) Generate data clauses for the variables. + SmallVector newPrivateOperands; + SmallVector newDataClauseOperands; + OpBuilder builder(computeConstructOp); + if (!candidateVars.empty()) { + LLVM_DEBUG(llvm::dbgs() << "== Generating clauses for ==\n" + << computeConstructOp << "\n"); + } + auto dominatingDataClauses = getDominatingDataClauses(computeConstructOp); + for (auto var : candidateVars) { + auto newDataClauseOp = generateDataClauseOpForCandidate( + var, module, builder, computeConstructOp, dominatingDataClauses, + defaultClause); + fillInBoundsForUnknownDimensions(newDataClauseOp, builder); + LLVM_DEBUG(llvm::dbgs() << "Generated data clause for " << var << ":\n" + << "\t" << *newDataClauseOp << "\n"); + if (isa_and_nonnull( + newDataClauseOp)) { + newPrivateOperands.push_back(acc::getAccVar(newDataClauseOp)); + } else if (isa_and_nonnull(newDataClauseOp)) { + newDataClauseOperands.push_back(acc::getAccVar(newDataClauseOp)); + dominatingDataClauses.push_back(acc::getAccVar(newDataClauseOp)); + } + } + + // 4) Legalize values in region (aka the uses in the region are the result + // of the data clause ops) + legalizeValuesInRegion(accRegion, newPrivateOperands, newDataClauseOperands); + + SmallVector newPrivateRecipeSyms; + // 5) Generate private recipes which are required for properly attaching + // private operands. + if constexpr (!std::is_same_v && + !std::is_same_v) { + generateRecipes(module, builder, computeConstructOp, newPrivateOperands, + newPrivateRecipeSyms); + } + + // 6) Figure out insertion order for the new data clause operands. + llvm::SmallVector sortedDataClauseOperands( + computeConstructOp.getDataClauseOperands()); + for (auto newClause : newDataClauseOperands) { + insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp()); + } + + // 7) Generate the data exit operations. + generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands, + sortedDataClauseOperands); + + // 8) Add all of the new operands to the compute construct op. + assert(newPrivateOperands.size() == newPrivateRecipeSyms.size() && + "sizes must match"); + if constexpr (!std::is_same_v && + !std::is_same_v) { + addNewPrivateOperands(computeConstructOp, newPrivateOperands, + newPrivateRecipeSyms); + } + computeConstructOp.getDataClauseOperandsMutable().assign( + sortedDataClauseOperands); +} + +void ACCImplicitData::runOnOperation() { + ModuleOp module = this->getOperation(); + module.walk([&](Operation *op) { + if (isa(op)) { + assert(op->getNumRegions() == 1 && "must have 1 region"); + + auto defaultClause = acc::getDefaultAttr(op); + llvm::TypeSwitch(op) + .Case( + [&](auto op) { + generateImplicitDataOps(module, op, defaultClause); + }) + .Default([&](Operation *) {}); + } + }); +} + +} // namespace diff --git a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt index 7d934956089a5..f8fff5958f8c7 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIROpenACCTransforms + ACCImplicitData.cpp LegalizeDataValues.cpp ADDITIONAL_HEADER_DIRS @@ -14,7 +15,10 @@ add_mlir_dialect_library(MLIROpenACCTransforms MLIROpenACCTypeInterfacesIncGen LINK_LIBS PUBLIC + MLIRAnalysis + MLIROpenACCAnalysis MLIROpenACCDialect + MLIROpenACCUtils MLIRFuncDialect MLIRIR MLIRPass diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir new file mode 100644 index 0000000000000..cff118b79aec2 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir @@ -0,0 +1,109 @@ +// RUN: mlir-opt %s -acc-implicit-data=enable-implicit-reduction-copy=true -split-input-file | FileCheck %s --check-prefix=COPY +// RUN: mlir-opt %s -acc-implicit-data=enable-implicit-reduction-copy=false -split-input-file | FileCheck %s --check-prefix=FIRSTPRIVATE + +// Test case: scalar reduction variable in parallel loop +// When enable-implicit-reduction-copy=true: expect copyin/copyout for reduction variable +// When enable-implicit-reduction-copy=false: expect firstprivate for reduction variable + +acc.reduction.recipe @reduction_add_memref_i32 : memref reduction_operator init { +^bb0(%arg0: memref): + %c0_i32 = arith.constant 0 : i32 + %alloc = memref.alloca() : memref + memref.store %c0_i32, %alloc[] : memref + acc.yield %alloc : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + %0 = memref.load %arg0[] : memref + %1 = memref.load %arg1[] : memref + %2 = arith.addi %0, %1 : i32 + memref.store %2, %arg0[] : memref + acc.yield %arg0 : memref +} + +func.func @test_reduction_implicit_copy() { + %c1_i32 = arith.constant 1 : i32 + %c100_i32 = arith.constant 100 : i32 + %c0_i32 = arith.constant 0 : i32 + %r = memref.alloca() : memref + memref.store %c0_i32, %r[] : memref + + acc.parallel { + %red_var = acc.reduction varPtr(%r : memref) -> memref {name = "r"} + acc.loop reduction(@reduction_add_memref_i32 -> %red_var : memref) control(%iv : i32) = (%c1_i32 : i32) to (%c100_i32 : i32) step (%c1_i32 : i32) { + %load = memref.load %red_var[] : memref + %add = arith.addi %load, %c1_i32 : i32 + memref.store %add, %red_var[] : memref + acc.yield + } attributes {independent = [#acc.device_type]} + acc.yield + } + return +} + +// When enable-implicit-reduction-copy=true: expect copyin/copyout for reduction variable +// COPY-LABEL: func.func @test_reduction_implicit_copy +// COPY: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref) -> memref {dataClause = #acc, implicit = true, name = ""} +// COPY: acc.copyout accPtr(%[[COPYIN]] : memref) to varPtr({{.*}} : memref) {dataClause = #acc, implicit = true, name = ""} + +// When enable-implicit-reduction-copy=false: expect firstprivate for reduction variable +// FIRSTPRIVATE-LABEL: func.func @test_reduction_implicit_copy +// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// FIRSTPRIVATE-NOT: acc.copyin +// FIRSTPRIVATE-NOT: acc.copyout + +// ----- + +// Test case: reduction variable used both in loop and outside +// Should be firstprivate regardless of the flag setting + +acc.reduction.recipe @reduction_add_memref_i32_2 : memref reduction_operator init { +^bb0(%arg0: memref): + %c0_i32 = arith.constant 0 : i32 + %alloc = memref.alloca() : memref + memref.store %c0_i32, %alloc[] : memref + acc.yield %alloc : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + %0 = memref.load %arg0[] : memref + %1 = memref.load %arg1[] : memref + %2 = arith.addi %0, %1 : i32 + memref.store %2, %arg0[] : memref + acc.yield %arg0 : memref +} + +func.func @test_reduction_with_usage_outside_loop() { + %c1_i32 = arith.constant 1 : i32 + %c100_i32 = arith.constant 100 : i32 + %c0_i32 = arith.constant 0 : i32 + %r = memref.alloca() : memref + %out = memref.alloca() : memref + memref.store %c0_i32, %r[] : memref + + %out_create = acc.create varPtr(%out : memref) -> memref {dataClause = #acc, name = "out"} + acc.parallel dataOperands(%out_create : memref) { + %red_var = acc.reduction varPtr(%r : memref) -> memref {name = "r"} + acc.loop reduction(@reduction_add_memref_i32_2 -> %red_var : memref) control(%iv : i32) = (%c1_i32 : i32) to (%c100_i32 : i32) step (%c1_i32 : i32) { + %load = memref.load %red_var[] : memref + %add = arith.addi %load, %c1_i32 : i32 + memref.store %add, %red_var[] : memref + acc.yield + } attributes {independent = [#acc.device_type]} + // out = r (usage of r outside the loop) + %final_r = memref.load %r[] : memref + memref.store %final_r, %out_create[] : memref + acc.yield + } + acc.copyout accPtr(%out_create : memref) to varPtr(%out : memref) {dataClause = #acc, name = "out"} + return +} + +// In this case, r should be firstprivate regardless of the flag setting +// because it's used outside the reduction context +// COPY-LABEL: func.func @test_reduction_with_usage_outside_loop +// COPY: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// COPY-NOT: acc.copyin varPtr({{.*}} : memref) -> memref {{.*}} name = "" + +// FIRSTPRIVATE-LABEL: func.func @test_reduction_with_usage_outside_loop +// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// FIRSTPRIVATE-NOT: acc.copyin varPtr({{.*}} : memref) -> memref {{.*}} name = "" + diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir new file mode 100644 index 0000000000000..cf09c33ca5197 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir @@ -0,0 +1,224 @@ +// RUN: mlir-opt %s -acc-implicit-data -split-input-file | FileCheck %s + +// ----- + +// Test scalar in serial construct - should generate firstprivate +func.func @test_scalar_in_serial() { + %alloc = memref.alloca() : memref + acc.serial { + %load = memref.load %alloc[] : memref + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_scalar_in_serial +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} + +// ----- + +// Test scalar in parallel construct - should generate firstprivate +func.func @test_scalar_in_parallel() { + %alloc = memref.alloca() : memref + acc.parallel { + %load = memref.load %alloc[] : memref + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_scalar_in_parallel +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} + +// ----- + +// Test scalar in kernels construct - should generate copyin/copyout +func.func @test_scalar_in_kernels() { + %alloc = memref.alloca() : memref + acc.kernels { + %load = memref.load %alloc[] : memref + acc.terminator + } + return +} + +// CHECK-LABEL: func.func @test_scalar_in_kernels +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref) -> memref {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref) to varPtr({{.*}} : memref) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test scalar in parallel with default(none) - should NOT generate implicit data +func.func @test_scalar_parallel_defaultnone() { + %alloc = memref.alloca() : memref + acc.parallel { + %load = memref.load %alloc[] : memref + acc.yield + } attributes {defaultAttr = #acc} + return +} + +// CHECK-LABEL: func.func @test_scalar_parallel_defaultnone +// CHECK-NOT: acc.firstprivate +// CHECK-NOT: acc.copyin + +// ----- + +// Test array in parallel - should generate copyin/copyout +func.func @test_array_in_parallel() { + %alloc = memref.alloca() : memref<10xf32> + acc.parallel { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<10xf32> + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_array_in_parallel +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<10xf32>) to varPtr({{.*}} : memref<10xf32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test array in kernels - should generate copyin/copyout +func.func @test_array_in_kernels() { + %alloc = memref.alloca() : memref<20xi32> + acc.kernels { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<20xi32> + acc.terminator + } + return +} + +// CHECK-LABEL: func.func @test_array_in_kernels +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<20xi32>) -> memref<20xi32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<20xi32>) to varPtr({{.*}} : memref<20xi32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test array with default(present) - should generate present +func.func @test_array_parallel_defaultpresent() { + %alloc = memref.alloca() : memref<10xf32> + acc.parallel { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<10xf32> + acc.yield + } attributes {defaultAttr = #acc} + return +} + +// CHECK-LABEL: func.func @test_array_parallel_defaultpresent +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {implicit = true, name = ""} +// CHECK: acc.delete accPtr(%[[PRESENT]] : memref<10xf32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test scalar with default(present) - should still generate firstprivate (scalars ignore default(present)) +func.func @test_scalar_parallel_defaultpresent() { + %alloc = memref.alloca() : memref + acc.parallel { + %load = memref.load %alloc[] : memref + acc.yield + } attributes {defaultAttr = #acc} + return +} + +// CHECK-LABEL: func.func @test_scalar_parallel_defaultpresent +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} + +// ----- + +// Test multidimensional array +func.func @test_multidim_array_in_parallel() { + %alloc = memref.alloca() : memref<8x16xf32> + acc.parallel { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %load = memref.load %alloc[%c0, %c1] : memref<8x16xf32> + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_multidim_array_in_parallel +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<8x16xf32>) -> memref<8x16xf32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<8x16xf32>) to varPtr({{.*}} : memref<8x16xf32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test dynamic size array +func.func @test_dynamic_array(%size: index) { + %alloc = memref.alloca(%size) : memref + acc.parallel { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_dynamic_array +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref) -> memref {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref) to varPtr({{.*}} : memref) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test variable with explicit data clause - implicit should recognize it +func.func @test_with_explicit_copyin() { + %alloc = memref.alloca() : memref<100xf32> + %copyin = acc.copyin varPtr(%alloc : memref<100xf32>) -> memref<100xf32> {name = "explicit"} + acc.parallel dataOperands(%copyin : memref<100xf32>) { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<100xf32> + acc.yield + } + acc.copyout accPtr(%copyin : memref<100xf32>) to varPtr(%alloc : memref<100xf32>) {name = "explicit"} + return +} + +// CHECK-LABEL: func.func @test_with_explicit_copyin +// CHECK: acc.present varPtr({{.*}} : memref<100xf32>) -> memref<100xf32> {implicit = true, name = ""} + +// ----- + +// Test multiple variables +func.func @test_multiple_variables() { + %alloc1 = memref.alloca() : memref + %alloc2 = memref.alloca() : memref<10xi32> + acc.parallel { + %load1 = memref.load %alloc1[] : memref + %c0 = arith.constant 0 : index + %load2 = memref.load %alloc2[%c0] : memref<10xi32> + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_multiple_variables +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<10xi32>) -> memref<10xi32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<10xi32>) to varPtr({{.*}} : memref<10xi32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test memref.view aliasing - view of explicitly copied buffer should generate present +func.func @test_memref_view(%size: index) { + %c0 = arith.constant 0 : index + %buffer = memref.alloca(%size) : memref + %copyin = acc.copyin varPtr(%buffer : memref) -> memref {name = "buffer"} + %view = memref.view %buffer[%c0][] : memref to memref<8x64xf32> + acc.kernels dataOperands(%copyin : memref) { + %c0_0 = arith.constant 0 : index + %c0_1 = arith.constant 0 : index + %load = memref.load %view[%c0_0, %c0_1] : memref<8x64xf32> + acc.terminator + } + acc.copyout accPtr(%copyin : memref) to varPtr(%buffer : memref) {name = "buffer"} + return +} + +// CHECK-LABEL: func.func @test_memref_view +// CHECK: acc.present varPtr({{.*}} : memref<8x64xf32>) -> memref<8x64xf32> {implicit = true, name = ""} + From 0cc63c174dbd64650231369fe858078eb7c7ecb4 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 4 Nov 2025 17:12:24 -0800 Subject: [PATCH 2/8] Fix braces and use named type --- .../OpenACC/Transforms/ACCImplicitData.cpp | 62 +++++++------------ 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 88de14950cd70..3547aa8f51e1c 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -290,20 +290,17 @@ class ACCImplicitData static bool isCandidateForImplicitData(Value val, Region &accRegion) { // Ensure the variable is an allowed type for data clause. if (!acc::isPointerLikeType(val.getType()) && - !acc::isMappableType(val.getType())) { + !acc::isMappableType(val.getType())) return false; - } // If this is already coming from a data clause, we do not need to generate // another. - if (isa_and_nonnull(val.getDefiningOp())) { + if (isa_and_nonnull(val.getDefiningOp())) return false; - } // If this is only used by private clauses, it is not a real live-in. - if (acc::isOnlyUsedByPrivateClauses(val, accRegion)) { + if (acc::isOnlyUsedByPrivateClauses(val, accRegion)) return false; - } return true; } @@ -334,10 +331,8 @@ ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { // Find the enclosing function/subroutine Operation *funcOp = computeConstructOp->getParentOfType(); - - if (!funcOp) { + if (!funcOp) return dominatingDataClauses.takeVector(); - } // Walk the function to find `acc.declare_enter`/`acc.declare_exit` pairs that // dominate and post-dominate the compute construct and add their data @@ -348,19 +343,17 @@ ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { if (domInfo.dominates(declareEnterOp.getOperation(), computeConstructOp)) { // Collect all `acc.declare_exit` ops for this token. SmallVector exits; - for (auto *user : declareEnterOp.getToken().getUsers()) { - if (auto declareExit = dyn_cast(user)) { + for (auto *user : declareEnterOp.getToken().getUsers()) + if (auto declareExit = dyn_cast(user)) exits.push_back(declareExit); - } - } + // Only add clauses if every `acc.declare_exit` op post-dominates the // compute construct. if (!exits.empty() && llvm::all_of(exits, [&](acc::DeclareExitOp exitOp) { return postDomInfo.postDominates(exitOp, computeConstructOp); })) { - for (auto dataClause : declareEnterOp.getDataClauseOperands()) { + for (auto dataClause : declareEnterOp.getDataClauseOperands()) dominatingDataClauses.insert(dataClause); - } } } }); @@ -377,11 +370,9 @@ Operation *ACCImplicitData::getOriginalDataClauseOpForAlias( if (auto *dataClauseOp = dataClause.getDefiningOp()) { // Only accept clauses that guarantee that the alias is present. if (isa(dataClauseOp)) { - if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust()) { + acc::DevicePtrOp>(dataClauseOp)) + if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust()) return dataClauseOp; - } - } } } return nullptr; @@ -443,7 +434,7 @@ acc::FirstprivateRecipeOp ACCImplicitData::generateFirstprivateRecipe( ModuleOp &module, mlir::Value var, mlir::Location loc, OpBuilder &builder, acc::OpenACCSupport &accSupport) { auto type = var.getType(); - auto recipeName = + std::string recipeName = accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var); // Check if recipe already exists @@ -644,16 +635,12 @@ addNewPrivateOperands(OpT &accOp, mlir::SmallVector &privateOperands, // Collect all of the existing recipes since they are held in an attribute. // To add to it, we need to create a brand new one. - if (accOp.getPrivatizationRecipes().has_value()) { - for (auto privatization : accOp.getPrivatizationRecipesAttr()) { + if (accOp.getPrivatizationRecipes().has_value()) + for (auto privatization : accOp.getPrivatizationRecipesAttr()) completePrivateRecipesSyms.push_back(privatization); - } - } - if (accOp.getFirstprivatizationRecipes().has_value()) { - for (auto privatization : accOp.getFirstprivatizationRecipesAttr()) { + if (accOp.getFirstprivatizationRecipes().has_value()) + for (auto privatization : accOp.getFirstprivatizationRecipesAttr()) completeFirstprivateRecipesSyms.push_back(privatization); - } - } // Now separate between private and firstprivate operands. for (auto [priv, privateRecipeSym] : @@ -674,14 +661,12 @@ addNewPrivateOperands(OpT &accOp, mlir::SmallVector &privateOperands, accOp.getFirstprivateOperandsMutable().append(newFirstprivateOperands); // Update the privatizationRecipes attributes to hold all of the new recipes. - if (!completePrivateRecipesSyms.empty()) { + if (!completePrivateRecipesSyms.empty()) accOp.setPrivatizationRecipesAttr( mlir::ArrayAttr::get(accOp.getContext(), completePrivateRecipesSyms)); - } - if (!completeFirstprivateRecipesSyms.empty()) { + if (!completeFirstprivateRecipesSyms.empty()) accOp.setFirstprivatizationRecipesAttr(mlir::ArrayAttr::get( accOp.getContext(), completeFirstprivateRecipesSyms)); - } } static mlir::Operation *findDataExitOp(mlir::Operation *dataEntryOp) { @@ -803,9 +788,8 @@ void ACCImplicitData::generateImplicitDataOps( // Implicit data attributes are only applied if "[t]here is no default(none) // clause visible at the compute construct." if (defaultClause.has_value() && - defaultClause.value() == acc::ClauseDefaultValue::None) { + defaultClause.value() == acc::ClauseDefaultValue::None) return; - } assert(!defaultClause.has_value() || defaultClause.value() == acc::ClauseDefaultValue::Present); @@ -856,17 +840,15 @@ void ACCImplicitData::generateImplicitDataOps( // 5) Generate private recipes which are required for properly attaching // private operands. if constexpr (!std::is_same_v && - !std::is_same_v) { + !std::is_same_v) generateRecipes(module, builder, computeConstructOp, newPrivateOperands, newPrivateRecipeSyms); - } // 6) Figure out insertion order for the new data clause operands. llvm::SmallVector sortedDataClauseOperands( computeConstructOp.getDataClauseOperands()); - for (auto newClause : newDataClauseOperands) { + for (auto newClause : newDataClauseOperands) insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp()); - } // 7) Generate the data exit operations. generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands, @@ -876,10 +858,10 @@ void ACCImplicitData::generateImplicitDataOps( assert(newPrivateOperands.size() == newPrivateRecipeSyms.size() && "sizes must match"); if constexpr (!std::is_same_v && - !std::is_same_v) { + !std::is_same_v) addNewPrivateOperands(computeConstructOp, newPrivateOperands, newPrivateRecipeSyms); - } + computeConstructOp.getDataClauseOperandsMutable().assign( sortedDataClauseOperands); } From 60a962d64f2494a128ba2c667cc19545ae34862c Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 4 Nov 2025 17:15:21 -0800 Subject: [PATCH 3/8] Use FunctionOpInterface instead of FuncOp --- mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 3547aa8f51e1c..67c857398652f 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -200,7 +200,6 @@ #include "mlir/Dialect/OpenACC/Transforms/Passes.h" #include "mlir/Analysis/AliasAnalysis.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/Dialect/OpenACC/OpenACCUtils.h" @@ -209,6 +208,7 @@ #include "mlir/IR/Dominance.h" #include "mlir/IR/Operation.h" #include "mlir/IR/Value.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Interfaces/ViewLikeInterface.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" @@ -330,7 +330,7 @@ ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { } // Find the enclosing function/subroutine - Operation *funcOp = computeConstructOp->getParentOfType(); + auto funcOp = computeConstructOp->getParentOfType(); if (!funcOp) return dominatingDataClauses.takeVector(); From 77f40d0900fb111b6b0bce76747293058f9c4ad1 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 4 Nov 2025 17:18:10 -0800 Subject: [PATCH 4/8] More braces fix --- .../OpenACC/Transforms/ACCImplicitData.cpp | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 67c857398652f..39540cad10637 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -515,26 +515,24 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate( op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp, dominatingDataClauses); if (op) { - if (isa(op)) { + if (isa(op)) return acc::NoCreateOp::create(builder, loc, var, /*structured=*/true, /*implicit=*/true, accSupport.getVariableName(var), acc::getBounds(op)); - } - if (isa(op)) { + + if (isa(op)) return acc::DevicePtrOp::create(builder, loc, var, /*structured=*/true, /*implicit=*/true, accSupport.getVariableName(var), acc::getBounds(op)); - } - { - // The original data clause op is a PresentOp, CopyinOp, or CreateOp, - // hence guaranteed to be present. - return acc::PresentOp::create(builder, loc, var, - /*structured=*/true, /*implicit=*/true, - accSupport.getVariableName(var), - acc::getBounds(op)); - } + + // The original data clause op is a PresentOp, CopyinOp, or CreateOp, + // hence guaranteed to be present. + return acc::PresentOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var), + acc::getBounds(op)); } else if (isScalar) { if (enableImplicitReductionCopy && acc::isOnlyUsedByReductionClauses(var, From 3685f40667702e68980971704c73331429f7e61a Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Wed, 5 Nov 2025 17:09:09 -0800 Subject: [PATCH 5/8] Remove uses of mlir:: --- .../OpenACC/Transforms/ACCImplicitData.cpp | 103 +++++++++--------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 39540cad10637..f9cc5b3d647ed 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -230,11 +230,9 @@ using namespace mlir; namespace { -class ACCImplicitData - : public mlir::acc::impl::ACCImplicitDataBase { +class ACCImplicitData : public acc::impl::ACCImplicitDataBase { public: - using mlir::acc::impl::ACCImplicitDataBase< - ACCImplicitData>::ACCImplicitDataBase; + using acc::impl::ACCImplicitDataBase::ACCImplicitDataBase; void runOnOperation() override; @@ -242,22 +240,21 @@ class ACCImplicitData /// Collects all data clauses that dominate the compute construct. /// Needed to determine if a variable is already covered by an existing data /// clause. - SmallVector - getDominatingDataClauses(Operation *computeConstructOp); + SmallVector getDominatingDataClauses(Operation *computeConstructOp); /// Looks through the `dominatingDataClauses` to find the original data clause /// op for an alias. Returns nullptr if no original data clause op is found. template Operation *getOriginalDataClauseOpForAlias( Value var, OpBuilder &builder, OpT computeConstructOp, - const SmallVector &dominatingDataClauses); + const SmallVector &dominatingDataClauses); /// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`, /// etc. data clause op for a candidate variable. template Operation *generateDataClauseOpForCandidate( Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, - const SmallVector &dominatingDataClauses, + const SmallVector &dominatingDataClauses, const std::optional &defaultClause); /// Generates the implicit data ops for a compute construct. @@ -267,15 +264,14 @@ class ACCImplicitData std::optional &defaultClause); /// Generates a private recipe for a variable. - acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, mlir::Value var, - mlir::Location loc, - OpBuilder &builder, + acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, Value var, + Location loc, OpBuilder &builder, acc::OpenACCSupport &accSupport); /// Generates a firstprivate recipe for a variable. acc::FirstprivateRecipeOp - generateFirstprivateRecipe(ModuleOp &module, mlir::Value var, - mlir::Location loc, OpBuilder &builder, + generateFirstprivateRecipe(ModuleOp &module, Value var, Location loc, + OpBuilder &builder, acc::OpenACCSupport &accSupport); /// Generates recipes for a list of variables. @@ -305,9 +301,9 @@ static bool isCandidateForImplicitData(Value val, Region &accRegion) { return true; } -SmallVector +SmallVector ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { - llvm::SmallSetVector dominatingDataClauses; + llvm::SmallSetVector dominatingDataClauses; llvm::TypeSwitch(computeConstructOp) .Case([&](auto op) { @@ -339,7 +335,7 @@ ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { // clauses to the list. auto &domInfo = this->getAnalysis(); auto &postDomInfo = this->getAnalysis(); - funcOp->walk([&](mlir::acc::DeclareEnterOp declareEnterOp) { + funcOp->walk([&](acc::DeclareEnterOp declareEnterOp) { if (domInfo.dominates(declareEnterOp.getOperation(), computeConstructOp)) { // Collect all `acc.declare_exit` ops for this token. SmallVector exits; @@ -364,7 +360,7 @@ ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { template Operation *ACCImplicitData::getOriginalDataClauseOpForAlias( Value var, OpBuilder &builder, OpT computeConstructOp, - const SmallVector &dominatingDataClauses) { + const SmallVector &dominatingDataClauses) { auto &aliasAnalysis = this->getAnalysis(); for (auto dataClause : dominatingDataClauses) { if (auto *dataClauseOp = dataClause.getDefiningOp()) { @@ -379,22 +375,22 @@ Operation *ACCImplicitData::getOriginalDataClauseOpForAlias( } // Generates bounds for variables that have unknown dimensions -static void fillInBoundsForUnknownDimensions(mlir::Operation *dataClauseOp, +static void fillInBoundsForUnknownDimensions(Operation *dataClauseOp, OpBuilder &builder) { - if (!mlir::acc::getBounds(dataClauseOp).empty()) + if (!acc::getBounds(dataClauseOp).empty()) // If bounds are already present, do not overwrite them. return; // For types that have unknown dimensions, attempt to generate bounds by // relying on MappableType being able to extract it from the IR. - auto var = mlir::acc::getVar(dataClauseOp); + auto var = acc::getVar(dataClauseOp); auto type = var.getType(); if (auto mappableTy = dyn_cast(type)) { if (mappableTy.hasUnknownDimensions()) { - TypeSwitch(dataClauseOp) + TypeSwitch(dataClauseOp) .Case([&](auto dataClauseOp) { - if (std::is_same_v) + if (std::is_same_v) return; OpBuilder::InsertionGuard guard(builder); builder.setInsertionPoint(dataClauseOp); @@ -407,8 +403,8 @@ static void fillInBoundsForUnknownDimensions(mlir::Operation *dataClauseOp, } acc::PrivateRecipeOp -ACCImplicitData::generatePrivateRecipe(ModuleOp &module, mlir::Value var, - mlir::Location loc, OpBuilder &builder, +ACCImplicitData::generatePrivateRecipe(ModuleOp &module, Value var, + Location loc, OpBuilder &builder, acc::OpenACCSupport &accSupport) { auto type = var.getType(); std::string recipeName = @@ -423,16 +419,17 @@ ACCImplicitData::generatePrivateRecipe(ModuleOp &module, mlir::Value var, OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToStart(module.getBody()); - auto recipe = mlir::acc::PrivateRecipeOp::createAndPopulate(builder, loc, - recipeName, type); + auto recipe = + acc::PrivateRecipeOp::createAndPopulate(builder, loc, recipeName, type); if (!recipe.has_value()) return accSupport.emitNYI(loc, "implicit private"), nullptr; return recipe.value(); } -acc::FirstprivateRecipeOp ACCImplicitData::generateFirstprivateRecipe( - ModuleOp &module, mlir::Value var, mlir::Location loc, OpBuilder &builder, - acc::OpenACCSupport &accSupport) { +acc::FirstprivateRecipeOp +ACCImplicitData::generateFirstprivateRecipe(ModuleOp &module, Value var, + Location loc, OpBuilder &builder, + acc::OpenACCSupport &accSupport) { auto type = var.getType(); std::string recipeName = accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var); @@ -447,8 +444,8 @@ acc::FirstprivateRecipeOp ACCImplicitData::generateFirstprivateRecipe( OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToStart(module.getBody()); - auto recipe = mlir::acc::FirstprivateRecipeOp::createAndPopulate( - builder, loc, recipeName, type); + auto recipe = acc::FirstprivateRecipeOp::createAndPopulate(builder, loc, + recipeName, type); if (!recipe.has_value()) return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr; return recipe.value(); @@ -465,14 +462,14 @@ void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder, auto recipe = generatePrivateRecipe( module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); if (recipe) - newRecipeSyms.push_back(mlir::SymbolRefAttr::get( - module->getContext(), recipe.getSymName().str())); + newRecipeSyms.push_back(SymbolRefAttr::get(module->getContext(), + recipe.getSymName().str())); } else if (isa(var.getDefiningOp())) { auto recipe = generateFirstprivateRecipe( module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); if (recipe) - newRecipeSyms.push_back(mlir::SymbolRefAttr::get( - module->getContext(), recipe.getSymName().str())); + newRecipeSyms.push_back(SymbolRefAttr::get(module->getContext(), + recipe.getSymName().str())); } else { accSupport.emitNYI(var.getLoc(), "implicit reduction"); } @@ -491,7 +488,7 @@ void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder, template Operation *ACCImplicitData::generateDataClauseOpForCandidate( Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, - const SmallVector &dominatingDataClauses, + const SmallVector &dominatingDataClauses, const std::optional &defaultClause) { auto &accSupport = this->getAnalysis(); acc::VariableTypeCategory typeCategory = @@ -509,7 +506,7 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate( acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar); bool isAnyAggregate = acc::bitEnumContainsAny( typeCategory, acc::VariableTypeCategory::aggregate); - mlir::Location loc = computeConstructOp->getLoc(); + Location loc = computeConstructOp->getLoc(); Operation *op = nullptr; op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp, @@ -574,7 +571,7 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate( /*structured=*/true, /*implicit=*/true, accSupport.getVariableName(var)); } else { - SmallVector bounds; + SmallVector bounds; auto copyinOp = acc::CopyinOp::create(builder, loc, var, /*structured=*/true, /*implicit=*/true, @@ -619,17 +616,17 @@ static void legalizeValuesInRegion(Region &accRegion, // operation in a valid way (ensures that the index in the privatizationRecipes // array matches the position of the private operand). template -static void -addNewPrivateOperands(OpT &accOp, mlir::SmallVector &privateOperands, - mlir::SmallVector &privateRecipeSyms) { +static void addNewPrivateOperands(OpT &accOp, + SmallVector &privateOperands, + SmallVector &privateRecipeSyms) { assert(privateOperands.size() == privateRecipeSyms.size()); if (privateOperands.empty()) return; - mlir::SmallVector completePrivateRecipesSyms; - mlir::SmallVector completeFirstprivateRecipesSyms; - mlir::SmallVector newPrivateOperands; - mlir::SmallVector newFirstprivateOperands; + SmallVector completePrivateRecipesSyms; + SmallVector completeFirstprivateRecipesSyms; + SmallVector newPrivateOperands; + SmallVector newFirstprivateOperands; // Collect all of the existing recipes since they are held in an attribute. // To add to it, we need to create a brand new one. @@ -661,13 +658,13 @@ addNewPrivateOperands(OpT &accOp, mlir::SmallVector &privateOperands, // Update the privatizationRecipes attributes to hold all of the new recipes. if (!completePrivateRecipesSyms.empty()) accOp.setPrivatizationRecipesAttr( - mlir::ArrayAttr::get(accOp.getContext(), completePrivateRecipesSyms)); + ArrayAttr::get(accOp.getContext(), completePrivateRecipesSyms)); if (!completeFirstprivateRecipesSyms.empty()) - accOp.setFirstprivatizationRecipesAttr(mlir::ArrayAttr::get( - accOp.getContext(), completeFirstprivateRecipesSyms)); + accOp.setFirstprivatizationRecipesAttr( + ArrayAttr::get(accOp.getContext(), completeFirstprivateRecipesSyms)); } -static mlir::Operation *findDataExitOp(mlir::Operation *dataEntryOp) { +static Operation *findDataExitOp(Operation *dataEntryOp) { auto res = acc::getAccVar(dataEntryOp); for (auto *user : res.getUsers()) if (isa(user)) @@ -685,8 +682,8 @@ static mlir::Operation *findDataExitOp(mlir::Operation *dataEntryOp) { // (after region) static void generateDataExitOperations(OpBuilder &builder, Operation *accOp, - mlir::SmallVector &newDataClauseOperands, - mlir::SmallVector &sortedDataClauseOperands) { + SmallVector &newDataClauseOperands, + SmallVector &sortedDataClauseOperands) { builder.setInsertionPointAfter(accOp); Value lastDataClause = nullptr; for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) { @@ -733,13 +730,13 @@ static llvm::SmallVector getBaseRefsChain(Value val) { while (true) { Value prevVal = val; - val = mlir::acc::getBaseEntity(val); + val = acc::getBaseEntity(val); if (val != baseRefs.front()) baseRefs.insert(baseRefs.begin(), val); // If this is a view-like operation, it is effectively another // view of the same entity so we should add it to the chain also. - if (auto viewLikeOp = val.getDefiningOp()) { + if (auto viewLikeOp = val.getDefiningOp()) { val = viewLikeOp.getViewSource(); baseRefs.insert(baseRefs.begin(), val); } From da954818bc0da36b39e081d203ca3d6088cdfec7 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Wed, 5 Nov 2025 17:10:44 -0800 Subject: [PATCH 6/8] Use SmallVector with llvm:: --- mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index f9cc5b3d647ed..40ee7dbadcdcf 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -724,8 +724,8 @@ generateDataExitOperations(OpBuilder &builder, Operation *accOp, /// s.f1.f2.f3, this will return . /// Any intermediate casts/view-like operations are included in the /// chain as well. -static llvm::SmallVector getBaseRefsChain(Value val) { - llvm::SmallVector baseRefs; +static SmallVector getBaseRefsChain(Value val) { + SmallVector baseRefs; baseRefs.push_back(val); while (true) { Value prevVal = val; @@ -750,7 +750,7 @@ static llvm::SmallVector getBaseRefsChain(Value val) { } static void -insertInSortedOrder(llvm::SmallVector &sortedDataClauseOperands, +insertInSortedOrder(SmallVector &sortedDataClauseOperands, Operation *newClause) { auto *insertPos = std::find_if(sortedDataClauseOperands.begin(), @@ -840,7 +840,7 @@ void ACCImplicitData::generateImplicitDataOps( newPrivateRecipeSyms); // 6) Figure out insertion order for the new data clause operands. - llvm::SmallVector sortedDataClauseOperands( + SmallVector sortedDataClauseOperands( computeConstructOp.getDataClauseOperands()); for (auto newClause : newDataClauseOperands) insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp()); From f6960a9056c5fbae4a4220205fe3e007b72890e1 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Wed, 5 Nov 2025 17:18:51 -0800 Subject: [PATCH 7/8] Add const to SmallVector arguments --- .../OpenACC/Transforms/ACCImplicitData.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 40ee7dbadcdcf..4d1f69b0ee356 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -602,9 +602,10 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate( // acc.kernels { // use %dev // } -static void legalizeValuesInRegion(Region &accRegion, - SmallVector &newPrivateOperands, - SmallVector &newDataClauseOperands) { +static void +legalizeValuesInRegion(Region &accRegion, + const SmallVector &newPrivateOperands, + const SmallVector &newDataClauseOperands) { for (Value dataClause : llvm::concat(newDataClauseOperands, newPrivateOperands)) { Value var = acc::getVar(dataClause.getDefiningOp()); @@ -616,9 +617,9 @@ static void legalizeValuesInRegion(Region &accRegion, // operation in a valid way (ensures that the index in the privatizationRecipes // array matches the position of the private operand). template -static void addNewPrivateOperands(OpT &accOp, - SmallVector &privateOperands, - SmallVector &privateRecipeSyms) { +static void +addNewPrivateOperands(OpT &accOp, const SmallVector &privateOperands, + const SmallVector &privateRecipeSyms) { assert(privateOperands.size() == privateRecipeSyms.size()); if (privateOperands.empty()) return; @@ -682,8 +683,8 @@ static Operation *findDataExitOp(Operation *dataEntryOp) { // (after region) static void generateDataExitOperations(OpBuilder &builder, Operation *accOp, - SmallVector &newDataClauseOperands, - SmallVector &sortedDataClauseOperands) { + const SmallVector &newDataClauseOperands, + const SmallVector &sortedDataClauseOperands) { builder.setInsertionPointAfter(accOp); Value lastDataClause = nullptr; for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) { @@ -749,9 +750,8 @@ static SmallVector getBaseRefsChain(Value val) { return baseRefs; } -static void -insertInSortedOrder(SmallVector &sortedDataClauseOperands, - Operation *newClause) { +static void insertInSortedOrder(SmallVector &sortedDataClauseOperands, + Operation *newClause) { auto *insertPos = std::find_if(sortedDataClauseOperands.begin(), sortedDataClauseOperands.end(), [&](Value dataClauseVal) { From e766fc96f84e5ba673f7592bef992c671cc16522 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Wed, 5 Nov 2025 17:30:03 -0800 Subject: [PATCH 8/8] Remove const from one spot --- mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 4d1f69b0ee356..a99e4846eea20 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -602,10 +602,9 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate( // acc.kernels { // use %dev // } -static void -legalizeValuesInRegion(Region &accRegion, - const SmallVector &newPrivateOperands, - const SmallVector &newDataClauseOperands) { +static void legalizeValuesInRegion(Region &accRegion, + SmallVector &newPrivateOperands, + SmallVector &newDataClauseOperands) { for (Value dataClause : llvm::concat(newDataClauseOperands, newPrivateOperands)) { Value var = acc::getVar(dataClause.getDefiningOp());