Skip to content

Instantly share code, notes, and snippets.

@rrbutani
Last active December 1, 2022 02:07
Show Gist options
  • Save rrbutani/f427d642dd995c1181918570b0caa156 to your computer and use it in GitHub Desktop.
Save rrbutani/f427d642dd995c1181918570b0caa156 to your computer and use it in GitHub Desktop.
.bazel-cc_toolchain_bootstrap_example
An update to https://gist.github.com/rrbutani/f5d80af864e67d873ae4491111d9dcce.
With the necessary `--incompatible` flags (see `.bazelrc`) this setup works all
the way back until *at least* Bazel 3.7.
This cquery command (pardon the `grep`s – they just strip out some noisy tool deps
from the graph) should execute without any errors (no dependency cycles!) and yield
a graph that shows `fake_compiler` being built twice: once by the host toolchain as
needed by `stub_toolchain` and another time using `stub_toolchain`.
```
bazel cquery 'deps(//:fake_compiler)' \
--output=graph \
--transitions=full \
| grep -v '"@local_config_cc//:[^t][^o]' \
| grep -v 'xcode' \
| grep -v 'tvos' \
| grep -v '"@bazel_tools//tools/cpp:[^t]' \
| grep -v 'ios' \
| grep -v '"@bazel_tools//src' \
| grep -v '"@bazel_tools//tools/[^c]' \
| grep -v '@platforms//'
```
The notes in `BUILD` about needing to explicitly specify toolchains to use
for bootstrapping/the failure mode being other toolchains silently getting
used are the only caveats I can think of; other than that this seems to work
pretty seamlessly.
build --incompatible_enable_cc_toolchain_resolution # *Still* needed in Bazel 5+ (https://github.com/bazelbuild/bazel/issues/7260)
build --incompatible_override_toolchain_transition # Not needed in Bazel 5+
# Uncomment for debugging:
# build --toolchain_resolution_debug=@bazel_tools//tools/cpp:toolchain_type
# Run with this config (i.e. `bazel run --config nobootstrap //:fake_compiler`)
# to build a target with the host toolchain instead of the boostrapped "toolchain".
#
# Note that if you're just trying to run the bootstrapped compiler (and not the
# compiler built by the *bootstrapped compiler*) you can run
# `//:bootstrapped_fake_compiler`.
build:nobootstrap --extra_toolchains=@local_config_cc_toolchains//:all
load("@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", "tool_path")
def _impl(ctx):
cc = ctx.attr.compiler[DefaultInfo].files.to_list()[0].path
return cc_common.create_cc_toolchain_config_info(
ctx = ctx,
toolchain_identifier = "stub_toolchain",
# features,
# action_configs,
# artifact_name_patterns,
# cxx_builtin_include_directories,
# toolchain_identifier,
host_system_name = "local",
target_system_name = "local",
target_cpu = "k8",
target_libc = "unknown",
compiler = "clang",
abi_version = "unknown",
abi_libc_version = "unknown",
tool_paths = [
tool_path(
name = "gcc",
path = cc,
),
tool_path(
name = "cpp",
path = cc,
),
tool_path(
name = "ld",
path = "/usr/bin/false",
),
tool_path(
name = "nm",
path = "/usr/bin/false"
),
tool_path(
name = "objdump",
path = "/usr/bin/false"
),
tool_path(
name = "strip",
path = "/usr/bin/false"
),
tool_path(
name = "ar",
path = "/usr/bin/false",
),
tool_path(
name = "gcov",
path = "/usr/bin/false",
)
],
# make_variables,
# builtin_sysroot,
# cc_target_os,
)
stub_toolchain_config = rule(
implementation = _impl,
attrs = {
"compiler": attr.label(
executable = True,
cfg = "exec",
),
},
provides = [CcToolchainConfigInfo],
)
def _stringify(s):
if type(s) == type(Label("//:a")):
return str(s)
elif type(s) == type("string"):
return s
else:
fail("`{}` has an invalid type: {}".format(s, type(s)))
def _specific_extra_toolchains_impl(settings, attrs):
toolchains = []
if hasattr(attrs, "specific_toolchains"):
t = attrs.specific_toolchains
if type(t) == type([]):
toolchains = [_stringify(s) for s in t]
else:
toolchains = [_stringify(t)]
# fail("transitioning {} to {}".format(settings["//command_line_option:extra_toolchains"], toolchains))
# print("transitioning {} to {}".format(settings["//command_line_option:extra_toolchains"], toolchains))
return {
"//command_line_option:extra_toolchains": toolchains,
}
_specific_extra_toolchains = transition(
implementation = _specific_extra_toolchains_impl,
inputs = ["//command_line_option:extra_toolchains"],
outputs = ["//command_line_option:extra_toolchains"],
)
def _with_toolchains_impl(ctx):
actual = ctx.attr.src[0][DefaultInfo].files.to_list()[0]
# ctx.actions.
# return [ctx.attr.src[0][CcInfo]]
f = ctx.actions.declare_file(
ctx.attr.name,
)
ctx.actions.symlink(output = f, target_file = actual, is_executable = True)
return [DefaultInfo(
executable = f,
)]
with_toolchains = rule(
implementation = _with_toolchains_impl,
attrs = {
"src": attr.label(
providers = [CcInfo],
executable = True,
cfg = _specific_extra_toolchains,
),
# This is a `string_list` instead of a `label_list` because target
# patterns (i.e. `//foo:all`) are permitted here
# (`--extra_toolchains=...` will expand them).
"specific_toolchains": attr.string_list(
# providers = [
# DeclaredToolchainInfo, # < is what we want but isn't exposed in starlark afaik
# platform_common.ToolchainInfo, # < is not right
# ]
),
"_allowlist_function_transition": attr.label(
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
)
},
executable = True,
)
###################### @local_config_cc toolchain wrapper #######################
# toolchain(
# name = "host_cc_toolchain",
# )
###################### Toolchain ######################
load("_internal.bzl", config = "stub_toolchain_config", "with_toolchains")
filegroup(name = "empty")
cc_binary(
name = "fake_compiler",
srcs = ["compiler.cc"],
)
cc_binary(
name = "test",
srcs = ["compiler.cc"],
)
# TODO: always transition to release mode, etc.
with_toolchains(
name = "bootstrapped_fake_compiler",
src = "fake_compiler",
# Unfortunately we have to recreate the toolchain resolution logic in
# `select`s here to specify which toolchains to add to extra toolchains
# because `local_config_cc` does not always emit all the toolchains.
#
# An alternate is to simply fall back to using the host toolchains and to
# *require* that users register the ultimate toolchains with
# `--extra_toolchains` (or use them in transitions) to avoid cycles here.
# This seems suboptimal.
#
# Update: `@local_config_cc_toolchains//:all` exists; all is well.
specific_toolchains = [
# "//:stub_toolchain",
# "@local_config_cc//:cc-compiler-darwin_x86_64",
"@local_config_cc_toolchains//:all",
]
)
# An alternative way is to add constraints that require a "bootstrap platform"
# and to have the transition above add an extra execution platform and to then
# create (or wrap) a toolchain so that it also has this as an execution
# constraint.
#
# The benefit to this approach is that there is no possibility that the above
# transition will *not* result in an appropriate toolchain being used; unlike
# the above such an approach cannot silently fall back to a regular or other
# host toolchain if toolchain resolution fails to match the desired bootstrap
# toolchain. This is a desirable property because in the above approach we can
# only filter out `--extra-toolchains` registered toolchains and not toolchains
# registered with `native.register_toolchains`. In practice this is not such a
# big deal (you'll get cycles if the bootstrap toolchain isn't selected, most
# likely) but this still may be worth exploring in the future.
config(name = "stub_toolchain_config", compiler = ":bootstrapped_fake_compiler")
cc_toolchain(
name = "stub_toolchain_impl",
toolchain_config = ":stub_toolchain_config",
toolchain_identifier = "stub_toolchain",
all_files = ":empty",
ar_files = ":empty",
as_files = ":empty",
compiler_files_without_includes = ":empty",
coverage_files = ":empty",
# cpu = "",
dwp_files = ":empty",
dynamic_runtime_lib = ":empty",
# exec_transition_for_inputs = False,
libc_top = ":empty",
# module_map = ":empty",
objcopy_files = ":empty",
static_runtime_lib = ":empty",
strip_files = ":empty",
supports_header_parsing = False,
supports_param_files = False,
# tags = [""],
# compiler_files = ":empty",
# compiler_files = ":fake_compiler",
compiler_files = ":bootstrapped_fake_compiler",
linker_files = ":bootstrapped_fake_compiler",
)
toolchain(
name = "stub_toolchain",
# target_settings = [],
exec_compatible_with = [
# adjust as necessary, etc.
"@platforms//os:macos",
"@platforms//cpu:x86_64",
],
toolchain = "//:stub_toolchain_impl",
toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
visibility = ["//visibility:public"],
)
#include <iostream>
#include <fstream>
#include <sys/stat.h>
using namespace std;
inline bool ends_with(std::string const & value, std::string const & ending)
{
if (ending.size() > value.size()) return false;
return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}
// A very silly brittle bad stand in for a compiler.
int main(int argc, char** argv) {
string inp_p("/dev/null"), out_p("/dev/null"), dep_p("/dev/null");
bool is_linker = false;
// The worst arg parser you've ever seen:
for (int i = 0; i < argc; i++) {
cerr << "arg " << i << ": " << argv[i] << endl;
if (string("-MF") == argv[i]) { dep_p = argv[i++ + 1]; }
else if (string("-c") == argv[i]) { inp_p = argv[i++ + 1]; }
else if (string("-o") == argv[i]) { out_p = argv[i++ + 1]; }
else if (argv[i][0] == '-') { continue; }
else if (ends_with(argv[i], ".o")) {
cerr << i << " indicates linking" << endl;
inp_p = argv[i]; is_linker = true;
}
}
ifstream inp;
ofstream out, dep;
inp.open(inp_p);
out.open(out_p);
dep.open(dep_p);
cerr << endl;
if (is_linker) { cerr << "LINKING: "; }
cerr << inp_p << " -> " << out_p << endl;
if (!is_linker) {
out << "#!/usr/bin/env bash" << endl;
out << "exec cat <<'SOME_LONG_EOF_STRING'" << endl;
}
out << inp.rdbuf() << endl;
if (!is_linker) {
out << "SOME_LONG_EOF_STRING" << endl;
dep << endl;
}
chmod(out_p.c_str(), 0775);
return 0;
}
def register_toolchain():
native.register_toolchains(
"//:stub_toolchain"
)
load("toolchain.bzl", "register_toolchain")
register_toolchain()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment