From cef10a3077412001cd05fc4515a6d94b9c958fa1 Mon Sep 17 00:00:00 2001 From: Toby Harradine Date: Wed, 22 Oct 2025 14:45:25 +1100 Subject: [PATCH] perf: improve analysis performance by 95% for `py_binary` and `py_test` rules --- python/private/BUILD.bazel | 14 ++ python/private/py_executable.bzl | 79 ++----- python/private/py_executable_zip_gen.cc | 291 ++++++++++++++++++++++++ 3 files changed, 327 insertions(+), 57 deletions(-) create mode 100644 python/private/py_executable_zip_gen.cc diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index 7b8c82de8f..f74e9f5b3e 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -14,6 +14,7 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//rules:common_settings.bzl", "bool_setting") +load("@rules_cc//cc:defs.bzl", "cc_binary") load("//python:py_binary.bzl", "py_binary") load("//python:py_library.bzl", "py_library") load(":print_toolchain_checksums.bzl", "print_toolchains_checksums") @@ -829,6 +830,19 @@ py_binary( ], ) +# Used for py_executable rule +# C++ wrapper for zipper to process Python zip manifests +cc_binary( + name = "py_executable_zip_gen", + srcs = ["py_executable_zip_gen.cc"], + copts = select({ + "@rules_cc//cc/compiler:msvc-cl": ["/std:c++17"], + "//conditions:default": ["-std=c++17"], + }), + data = ["@bazel_tools//tools/zip:zipper"], + deps = ["@bazel_tools//tools/cpp/runfiles"], +) + py_binary( name = "py_wheel_dist", srcs = ["py_wheel_dist.py"], diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 1a5ad4c3c6..f951ea78da 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -67,8 +67,6 @@ load(":transition_labels.bzl", "TRANSITION_LABELS") load(":venv_runfiles.bzl", "create_venv_app_files") _py_builtins = py_internal -_EXTERNAL_PATH_PREFIX = "external" -_ZIP_RUNFILES_DIRECTORY_NAME = "runfiles" # Non-Google-specific attributes for executables # These attributes are for rules that accept Python sources. @@ -236,7 +234,7 @@ accepting arbitrary Python versions. "_zipper": lambda: attrb.Label( cfg = "exec", executable = True, - default = "@bazel_tools//tools/zip:zipper", + default = ":py_executable_zip_gen", ), }, ) @@ -380,9 +378,8 @@ def _create_executable( _create_zip_file( ctx, output = zip_file, - original_nonzip_executable = executable, zip_main = zip_main, - runfiles = runfiles_details.default_runfiles.merge(extra_runfiles), + runfiles = runfiles_details.runfiles_without_exe.merge(extra_runfiles), ) extra_files_to_build = [] @@ -803,35 +800,16 @@ def _create_windows_exe_launcher( use_default_shell_env = True, ) -def _create_zip_file(ctx, *, output, original_nonzip_executable, zip_main, runfiles): +def _create_zip_file(ctx, *, output, zip_main, runfiles): """Create a Python zipapp (zip with __main__.py entry point).""" - workspace_name = ctx.workspace_name legacy_external_runfiles = _py_builtins.get_legacy_external_runfiles(ctx) - manifest = ctx.actions.args() - manifest.use_param_file("@%s", use_always = True) - manifest.set_param_file_format("multiline") - - manifest.add("__main__.py={}".format(zip_main.path)) - manifest.add("__init__.py=") - manifest.add( - "{}=".format( - _get_zip_runfiles_path("__init__.py", workspace_name, legacy_external_runfiles), - ), - ) - for path in runfiles.empty_filenames.to_list(): - manifest.add("{}=".format(_get_zip_runfiles_path(path, workspace_name, legacy_external_runfiles))) - - def map_zip_runfiles(file): - if file != original_nonzip_executable and file != output: - return "{}={}".format( - _get_zip_runfiles_path(file.short_path, workspace_name, legacy_external_runfiles), - file.path, - ) - else: - return None - - manifest.add_all(runfiles.files, map_each = map_zip_runfiles, allow_closure = True) + args = ctx.actions.args() + args.add("--output", output) + args.add("--workspace-name", ctx.workspace_name) + args.add("--main-file", zip_main) + if legacy_external_runfiles: + args.add("--legacy-external-runfiles") inputs = [zip_main] if _py_builtins.is_bzlmod_enabled(ctx): @@ -844,43 +822,30 @@ def _create_zip_file(ctx, *, output, original_nonzip_executable, zip_main, runfi runfiles = runfiles, output = zip_repo_mapping_manifest, ) - manifest.add("{}/_repo_mapping={}".format( - _ZIP_RUNFILES_DIRECTORY_NAME, - zip_repo_mapping_manifest.path, - )) + args.add("--repo-mapping-manifest", zip_repo_mapping_manifest) inputs.append(zip_repo_mapping_manifest) - for artifact in runfiles.files.to_list(): - # Don't include the original executable because it isn't used by the - # zip file, so no need to build it for the action. - # Don't include the zipfile itself because it's an output. - if artifact != original_nonzip_executable and artifact != output: - inputs.append(artifact) - - zip_cli_args = ctx.actions.args() - zip_cli_args.add("cC") - zip_cli_args.add(output) + manifest = ctx.actions.args() + manifest.use_param_file("%s", use_always = True) + manifest.set_param_file_format("multiline") + manifest.add_all(runfiles.empty_filenames, map_each = _get_zip_empty_path_arg) + manifest.add_all(runfiles.files, map_each = _get_zip_path_arg) ctx.actions.run( executable = ctx.executable._zipper, - arguments = [zip_cli_args, manifest], - inputs = depset(inputs), + arguments = [args, manifest], + inputs = depset(inputs, transitive = [runfiles.files]), outputs = [output], use_default_shell_env = True, mnemonic = "PythonZipper", progress_message = "Building Python zip: %{label}", ) -def _get_zip_runfiles_path(path, workspace_name, legacy_external_runfiles): - if legacy_external_runfiles and path.startswith(_EXTERNAL_PATH_PREFIX): - zip_runfiles_path = paths.relativize(path, _EXTERNAL_PATH_PREFIX) - else: - # NOTE: External runfiles (artifacts in other repos) will have a leading - # path component of "../" so that they refer outside the main workspace - # directory and into the runfiles root. By normalizing, we simplify e.g. - # "workspace/../foo/bar" to simply "foo/bar". - zip_runfiles_path = paths.normalize("{}/{}".format(workspace_name, path)) - return "{}/{}".format(_ZIP_RUNFILES_DIRECTORY_NAME, zip_runfiles_path) +def _get_zip_empty_path_arg(file): + return "{}=".format(file) + +def _get_zip_path_arg(file): + return "{}={}".format(file.short_path, file.path) def _create_executable_zip_file( ctx, diff --git a/python/private/py_executable_zip_gen.cc b/python/private/py_executable_zip_gen.cc new file mode 100644 index 0000000000..8c6bcec2c4 --- /dev/null +++ b/python/private/py_executable_zip_gen.cc @@ -0,0 +1,291 @@ +// Copyright 2025 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Wrapper for zipper that processes Python zip manifest generation. +// +// This C++ script wraps `@bazel_tools//tools/zip/zipper` to produce the +// input manifest for the zipper tool during build time, rather than at +// analysis time. +// +// Usage: py_executable_zip_gen [flags...] +// Flags are passed as regular command-line arguments +// Input files manifest contains the list of files to include (short_path=disk_path) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tools/cpp/runfiles/runfiles.h" + +using bazel::tools::cpp::runfiles::Runfiles; +namespace fs = std::filesystem; + +// Path manipulation utilities +namespace path { + +// Normalize a path (remove "../" and "." components) +std::string normalize(const std::string &p) { + return fs::path(p).lexically_normal().generic_string(); +} + +// Remove prefix from path +std::string relativize(const std::string &path, const std::string &prefix) { + if (path.size() >= prefix.size() && path.compare(0, prefix.size(), prefix) == 0) { + size_t start = prefix.size(); + if (start < path.size() && path[start] == '/') { + start++; + } + return path.substr(start); + } + return path; +} + +bool starts_with(const std::string &str, const std::string &prefix) { + return str.size() >= prefix.size() && str.compare(0, prefix.size(), prefix) == 0; +} + +} // namespace path + +struct FileEntry { + std::string short_path; + std::string disk_path; + + // Parse a file entry in "short_path=disk_path" format + static FileEntry parse(const std::string &line) { + FileEntry entry; + size_t eq = line.find('='); + if (eq == std::string::npos) { + std::cerr << "ERROR: Invalid file entry (no '='): " << line << std::endl; + std::exit(1); + } + + entry.short_path = line.substr(0, eq); + entry.disk_path = line.substr(eq + 1); + + return entry; + } +}; + +// Get path inside the zip where a file should go +std::string get_zip_runfiles_path(const std::string &path, const std::string &workspace_name, + bool legacy_external_runfiles) { + std::string zip_runfiles_path; + + if (legacy_external_runfiles && path::starts_with(path, "external/")) { + zip_runfiles_path = path::relativize(path, "external"); + } else { + // Normalize workspace_name/../external/path to external/path + std::string combined = workspace_name + "/" + path; + zip_runfiles_path = path::normalize(combined); + } + + return "runfiles/" + zip_runfiles_path; +} + +// Check if an executable exists in runfiles, accounting for Windows file extensions. +// Returns the path if found, otherwise an empty string. +std::string find_executable_in_runfiles(Runfiles *runfiles, const std::string &runfile_path) { + std::string path = runfiles->Rlocation(runfile_path); + if (path.empty()) { + return ""; + } + +#ifndef _WIN32 + if (std::filesystem::exists(path)) { + return path; + } +#else + // On Windows, try common executable extensions + std::string path_with_ext; + const std::vector extensions = {".exe", ".bat", ".cmd"}; + for (const auto &ext : extensions) { + path_with_ext = path + ext; + if (std::filesystem::exists(path_with_ext)) { + return path_with_ext; + } + } +#endif + + return ""; // Not found +} + +struct Config { + std::string output; + std::string workspace_name; + std::string main_file; + std::string repo_mapping_manifest; + bool legacy_external_runfiles = false; + std::string input_files_manifest; +}; + +Config parse_args(int argc, char *argv[]) { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [flags...] " << std::endl; + std::exit(1); + } + + Config config; + + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + + if (arg == "--output") { + if (i + 1 >= argc) { + std::cerr << "ERROR: --output requires a value" << std::endl; + std::exit(1); + } + config.output = argv[++i]; + } else if (arg == "--workspace-name") { + if (i + 1 >= argc) { + std::cerr << "ERROR: --workspace-name requires a value" << std::endl; + std::exit(1); + } + config.workspace_name = argv[++i]; + } else if (arg == "--main-file") { + if (i + 1 >= argc) { + std::cerr << "ERROR: --main-file requires a value" << std::endl; + std::exit(1); + } + config.main_file = argv[++i]; + } else if (arg == "--repo-mapping-manifest") { + if (i + 1 >= argc) { + std::cerr << "ERROR: --repo-mapping-manifest requires a value" << std::endl; + std::exit(1); + } + config.repo_mapping_manifest = argv[++i]; + } else if (arg == "--legacy-external-runfiles") { + config.legacy_external_runfiles = true; + } else { + config.input_files_manifest = arg; + } + } + + return config; +} + +std::vector read_input_manifest(const std::string &manifest_path) { + std::vector files; + std::ifstream in(manifest_path); + + if (!in) { + std::cerr << "ERROR: Cannot open input files manifest: " << manifest_path << std::endl; + std::exit(1); + } + + std::string line; + while (std::getline(in, line)) { + if (!line.empty()) { + files.push_back(FileEntry::parse(line)); + } + } + + return files; +} + +void validate_config(const Config &config) { + if (config.input_files_manifest.empty()) { + std::cerr << "ERROR: No input files manifest specified" << std::endl; + std::exit(1); + } + if (config.output.empty()) { + std::cerr << "ERROR: --output is required" << std::endl; + std::exit(1); + } + if (config.workspace_name.empty()) { + std::cerr << "ERROR: --workspace-name is required" << std::endl; + std::exit(1); + } + if (config.main_file.empty()) { + std::cerr << "ERROR: --main-file is required" << std::endl; + std::exit(1); + } +} + +void write_zip_manifest(const Config &config, const std::vector &files, + const std::string &manifest_path) { + // Open in binary mode to avoid LF -> CRLF conversion on Windows. + std::ofstream manifest(manifest_path, std::ios::binary); + if (!manifest) { + std::cerr << "ERROR: Cannot create manifest file: " << manifest_path << std::endl; + std::exit(1); + } + + manifest << "__main__.py=" << config.main_file << "\n"; + + manifest << "__init__.py=\n"; + manifest << get_zip_runfiles_path("__init__.py", config.workspace_name, + config.legacy_external_runfiles) + << "=\n"; + + for (const auto &file : files) { + std::string zip_path = get_zip_runfiles_path(file.short_path, config.workspace_name, + config.legacy_external_runfiles); + manifest << zip_path << "=" << file.disk_path << "\n"; + } + + if (!config.repo_mapping_manifest.empty()) { + manifest << "runfiles/_repo_mapping=" << config.repo_mapping_manifest << "\n"; + } +} + +void run_zipper(const std::string &executable, const std::string &output, + const std::string &manifest_path) { + std::string error; + std::unique_ptr runfiles(Runfiles::Create(executable, &error)); + + if (runfiles == nullptr) { + std::cerr << "ERROR: Failed to initialize runfiles: " << error << std::endl; + std::exit(1); + } + + std::string zipper_path = + find_executable_in_runfiles(runfiles.get(), "bazel_tools/tools/zip/zipper/zipper"); + if (zipper_path.empty()) { + // @bazel_tools/tools/zip:zipper is an alias for @bazel_tools/third_party/ijar:zipper. + // On some systems, this means the binary is located in bazel_tools/third_party/ijar/zipper + // instead. + zipper_path = + find_executable_in_runfiles(runfiles.get(), "bazel_tools/third_party/ijar/zipper"); + if (zipper_path.empty()) { + std::cerr << "ERROR: Could not locate zipper in runfiles" << std::endl; + std::exit(1); + } + } + + std::string cmd = zipper_path + " cC " + output + " @" + manifest_path; + int result = std::system(cmd.c_str()); + + if (result != 0) { + std::cerr << "ERROR: zipper failed with exit code " << result << std::endl; + std::exit(1); + } +} + +int main(int argc, char *argv[]) { + Config config = parse_args(argc, argv); + validate_config(config); + + std::vector files = read_input_manifest(config.input_files_manifest); + + std::string zip_manifest_path = config.output + ".manifest.txt"; + write_zip_manifest(config, files, zip_manifest_path); + run_zipper(argv[0], config.output, zip_manifest_path); + + return 0; +}