LLVM 23.0.0git
DTLTO.h
Go to the documentation of this file.
1//===- DTLTO.h - Integrated Distributed ThinLTO implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Declarations for Integrated Distributed ThinLTO, including the DTLTO class
11// and the distribution driver. The implementation focuses on preparing input
12// files for distribution.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_DTLTO_DTLTO_H
17#define LLVM_DTLTO_DTLTO_H
18
20#include "llvm/LTO/LTO.h"
23
24#include <functional>
25#include <vector>
26
27namespace llvm {
28namespace lto {
29
30/// Prepares inputs for Distributed ThinLTO so that backend compilations can use
31/// individual bitcode paths and consistent module IDs.
32///
33/// Each input must exist as an individual bitcode file on disk and be loadable
34/// via its ModuleID. Archive members and FatLTO objects do not satisfy that by
35/// default; this class writes bitcode out when needed and updates ModuleID.
36/// On Windows, module IDs are normalized to remove short 8.3 path components
37/// that are machine-local and break distribution; other normalization is left
38/// to DTLTO distributors.
39///
40/// Input files are kept until the pipeline has determined per-module ThinLTO
41/// participation. addInput() performs: (1) register the input; (2) on Windows,
42/// normalize module ID for standalone bitcode; (3) for thin archive members,
43/// set module ID to the on-disk member path; (4) for other archives and FatLTO,
44/// set module ID to a unique path and serialize content in
45/// serializeLTOInputs().
46class DTLTO : public LTO {
47 using Base = LTO;
48
49public:
50 LLVM_ABI DTLTO(Config Conf, unsigned ParallelCodeGenParallelismLevel,
52 bool EmitIndexFiles, bool EmitImportsFiles,
53 StringRef LinkerOutputFile, StringRef Distributor,
54 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
55 ArrayRef<StringRef> RemoteCompilerPrependArgs,
56 ArrayRef<StringRef> RemoteCompilerArgs,
57 AddBufferFn AddBufferArg, bool SaveTempsArg)
59 ParallelCodeGenParallelismLevel, LTOMode),
60 AddBuffer(AddBufferArg), SaveTemps(SaveTempsArg),
61 ShouldEmitIndexFiles(EmitIndexFiles),
62 ShouldEmitImportFiles(EmitImportsFiles), OnIndexWriteCb(OnWrite),
63 DistributorParams{Distributor, DistributorArgs,
64 RemoteCompiler, RemoteCompilerPrependArgs,
65 RemoteCompilerArgs, LinkerOutputFile} {
66 assert(!LinkerOutputFile.empty() && "expected a valid linker output file");
68 }
69
70 // Create an instance of WriteIndexesBackend class.
73 "", true, nullptr, nullptr);
74 }
75
76 /// Add an input file and prepare it for distribution.
77 ///
78 /// This function performs the following tasks:
79 /// 1. Add the input file to the LTO object's list of input files.
80 /// 2. For individual bitcode file inputs on Windows only, overwrite the
81 /// module ID with a normalized path to remove short 8.3 form components.
82 /// 3. For thin archive members, overwrite the module ID with the path
83 /// (normalized on Windows) to the member file on disk.
84 /// 4. For archive members and FatLTO objects, overwrite the module ID with a
85 /// unique path (normalized on Windows) naming a file that will contain the
86 /// member content. The file is created and populated later (see
87 /// serializeInputs()).
89 addInput(std::unique_ptr<InputFile> InputPtr) override;
90
91 /// Runs the DTLTO pipeline. This function calls the supplied AddStream
92 /// function to add native object files to the link.
93 ///
94 /// The Cache parameter is optional. If supplied, it will be used to cache
95 /// native object files and add them to the link.
96 ///
97 /// The client will receive at most one callback (via either AddStream or
98 /// Cache) for each task identifier.
99 LLVM_ABI virtual Error run(AddStreamFn AddStream,
100 FileCache Cache = {}) override;
101
102private:
103 /// DTLTO archives support.
104 ///
105 /// Save the contents of ThinLTO-enabled input files that must be serialized
106 /// for distribution, such as archive members and FatLTO objects, to
107 /// individual bitcode files named after the module ID.
108 ///
109 /// Must be called after all input files are added but before optimization
110 /// begins. If a file with that name already exists, it is likely a leftover
111 /// from a previously terminated linker process and can be safely overwritten.
112 LLVM_ABI Error serializeLTOInputs();
113
114 // Remove temporary files created to enable distribution.
115 LLVM_ABI void cleanup() override;
116
117public:
118 // Mutable and const accessors to the LTO configuration object.
119 Config &getConfig() { return Conf; }
120 const Config &getConfig() const { return Conf; }
121
122private:
123 // Bump allocator for saving updated module IDs.
124 BumpPtrAllocator PtrAlloc;
125 // String saver backed by PtrAlloc.
126 StringSaver Saver{PtrAlloc};
127
128 using SString = SmallString<128>;
129
130 // Function pointer that defines the callback to add a pre-existing file.
131 AddBufferFn AddBuffer;
132 // Count of jobs that hit the cache.
133 std::atomic<size_t> CachedJobs{0};
134 // Normalized output directory from LinkerOutputFile.
135 SString LinkerOutputDir;
136 // Keep temporary files when true.
137 bool SaveTemps = false;
138
139 // Saves the content of Buffer to Path overwriting any existing file.
140 static Error save(StringRef Buffer, StringRef Path);
141
142public:
143 struct Job {
144 // Task index (combines RegularLTO parallel codegen offset with module
145 // index).
146 unsigned Task;
147 // Module identifier (bitcode path) for the ThinLTO module.
149 // Native object path.
151 // Per-module summary index path.
153 // Per-module imports list path.
155 // Bitcode files from which this module imports.
157 // Cache key from thin link.
158 std::string CacheKey;
159 // On cache miss, stream used to store the compiled object in the cache.
161 // Set when the object was already supplied via the cache callback.
162 bool Cached = false;
163 };
164
165private:
166 // Backend compilation jobs, one per module.
167 SmallVector<Job> Jobs;
168 // Task index offset for first ThinLTO job.
169 unsigned ThinLTOTaskOffset;
170 // Optional cache for native objects.
171 FileCache Cache;
172 // Keep summary index files when true.
173 bool ShouldEmitIndexFiles = false;
174 // Keep summary import files when true.
175 bool ShouldEmitImportFiles = false;
176 // On index file write callback.
177 IndexWriteCallback OnIndexWriteCb;
178
179 /// Probes the LTO cache for a compiled native object for the given job.
180 ///
181 /// If no cache is configured (Cache.isValid() is false), returns immediately
182 /// without modifying the job.
183 ///
184 /// Otherwise, looks up the cache using J.CacheKey. On a cache hit, the cached
185 /// object has already been passed to the linker via the Cache callback, so
186 /// J.Cached is set to true, CachedJobs is incremented, and the distributor
187 /// can skip this job. On a cache miss, the cache returns an AddStreamFn; we
188 /// store it in J.CacheAddStream for use when storing the freshly compiled
189 /// object after the distributor runs.
190 ///
191 /// \param J The job to check. Must have Task, CacheKey, and ModuleID set.
192 /// On return, J.Cached and J.CacheAddStream may be updated.
193 ///
194 /// \returns Error::success() on success, or an Error from the cache lookup.
195 Error checkCacheHit(Job &J);
196
197 /// Prepares a single DTLTO backend compilation job for a ThinLTO module.
198 ///
199 /// Called once per module during performCodegen(). This function:
200 ///
201 /// 1. Computes output paths for the native object and summary index files.
202 /// Both are placed in the linker output directory with names of the form
203 /// stem.Task.UID.native.o and stem.Task.UID.thinlto.bc, where stem is
204 /// derived from ModulePath.
205 ///
206 /// 2. Initializes the Job struct with Task, ModuleID (ModulePath), paths,
207 /// ImportsFilesList and CacheKey from thin link results, and default
208 /// values for CacheAddStream and Cached.
209 ///
210 /// 3. Calls checkCacheHit() to probe the cache. On a cache hit, J.Cached is
211 /// set and the cached object has already been passed to the linker; the
212 /// distributor will skip this job. On a cache miss, J.CacheAddStream is
213 /// set for later use when storing the compiled object.
214 ///
215 /// 4. Writes the per-module summary index to disk only on cache miss. The
216 /// remote compiler will read this via -fthinlto-index=.
217 ///
218 /// 5. Registers the job's temporary files for removal on abnormal process
219 /// exit when SaveTemps is false (only for files that will be created).
220 ///
221 /// \param ModulePath The module identifier (bitcode path) for the ThinLTO
222 /// module.
223 /// \param Task The task index (combines RegularLTO.ParallelCodeGen
224 /// parallelism offset with the module index).
225 ///
226 /// \returns Error::success() on success, or an Error from saveBuffer() or
227 /// checkCacheHit().
228 Error prepareDtltoJob(StringRef ModulePath, unsigned Task);
229
230 /// Initializes DTLTO state and prepares a job for each ThinLTO module.
231 ///
232 /// Sets task offset, target triple, UID, and Jobs. For each module, calls
233 /// prepareDtltoJob() to assign output paths, check the cache, and write
234 /// summary index shards to disk when needed.
235 ///
236 /// \returns Error::success() on success, or an Error from prepareDtltoJob.
237 Error prepareDtltoJobs();
238
239 /// Runs the DTLTO code generation phase. Must be invoked after thinLink().
240 ///
241 /// Builds Clang options, emits a JSON manifest describing compilation jobs,
242 /// and invokes the distributor to compile ThinLTO modules remotely. Cache
243 /// hits are skipped; the distributor runs only when there are uncached jobs.
244 ///
245 /// \returns Error::success() on success, or an Error on manifest or
246 /// distributor failure.
247 Error performCodegen();
248
249 /// Adds compiled object files to the link for each non-cached job.
250 ///
251 /// Loads each native object from disk, then either writes it to the cache
252 /// (which adds it to the link via the cache callback) or passes it to
253 /// AddStreamFunc directly when caching is disabled.
254 ///
255 /// \returns Error::success() on success, or an Error if a file cannot be read
256 /// or a cache stream cannot be obtained.
257 Error addObjectFilesToLink();
258
259 // Determines if a file at the given path is a thin archive file.
260 //
261 // Uses a cache to avoid repeatedly reading the same file; reads only the
262 // header (magic bytes) to identify the archive type.
263 Expected<bool> isThinArchive(const StringRef ArchivePath);
264
265 // Unique ID for this link (process ID as string).
266 std::string UID;
267
268 // Input files registered for this link (same order as addInput).
269 std::vector<std::shared_ptr<lto::InputFile>> InputFiles;
270 // Cache for isThinArchive() results keyed by archive path.
271 StringMap<bool> ArchiveIsThinCache;
272 // Callback used by run() to add native objects to the link.
273 AddStreamFn AddStreamFunc = nullptr;
274 // Per-task summary index shards from the thin link (in-memory buffers).
275 std::vector<SmallString<0>> SummaryIndexFiles;
276 // Per-task imported bitcode paths from the thin link.
277 std::vector<std::vector<std::string>> ImportsFilesList;
278 // Per-task cache keys for incremental builds from the thin link.
279 std::vector<std::string> CacheKeysList;
280
281 /// Runs the DTLTO thin link phase, producing per-module summary indices,
282 /// import lists, and cache keys for distribution.
283 ///
284 /// This function configures a WriteIndexesThinBackend and invokes the base
285 /// LTO run, which performs the thin link. The thin link resolves cross-module
286 /// references and produces:
287 ///
288 /// - SummaryIndexFiles: per-module summary index shards (in-memory buffers)
289 /// - ImportsFilesList: per-module lists of imported bitcode files
290 /// - CacheKeysList: per-module cache keys for incremental builds
291 /// - ModuleNames: per-module identifiers
292 ///
293 /// The Config callbacks (GetSummaryIndexStreamFunc, GetCacheKeysListRefFunc,
294 /// GetImportsListRefFunc) are installed so the WriteIndexesThinBackend
295 /// populates these arrays. performCodegen() later uses them to prepare
296 /// backend jobs.
297 ///
298 /// \returns Error::success() if the thin link completes, or an Error from
299 /// Base::run().
300 Error performThinLink();
301
302 /// Derive a set of Clang options that will be shared/common for all DTLTO
303 /// backend compilations. We are intentionally minimal here as these options
304 /// must remain synchronized with the behavior of Clang. DTLTO does not
305 /// support all the features available with in-process LTO. More features are
306 /// expected to be added over time. Users can specify Clang options directly
307 /// if a feature is not supported. Note that explicitly specified options that
308 /// imply additional input or output file dependencies must be communicated to
309 /// the distribution system, potentially by setting extra options on the
310 /// distributor program.
311 void buildCommonRemoteCompilerOptions();
312
313public:
314 // Parameters and shared state for DistributorDriver class.
319 ArrayRef<StringRef> DistributorArgsArg,
320 StringRef RemoteCompilerArg,
321 ArrayRef<StringRef> RemoteCompilerPrependArgsArg,
322 ArrayRef<StringRef> RemoteCompilerArgsArg,
323 StringRef LinkerOutputFileArg)
324 : LinkerOutputFile(LinkerOutputFileArg),
325 DistributorPath(DistributorArg), DistributorArgs(DistributorArgsArg),
326 RemoteCompiler(RemoteCompilerArg),
327 RemoteCompilerPrependArgs(RemoteCompilerPrependArgsArg),
328 RemoteCompilerArgs(RemoteCompilerArgsArg) {}
329
330 // Output linker file path.
332 // Path to the distributor executable.
334 // Arguments passed to the distributor.
336 // Compiler executabl invoked by the distributor (e.g., Clang).
338 // Options prepended to remote compiler args.
340 // User-supplied options passed to remote compiler.
342
343 // Common Clang options for all compilation jobs.
345 // Input paths shared across compilation jobs.
347 // Target triple for compilations.
349 };
350
351private:
352 // Distributor configuration class instance.
353 DistributionDriverParams DistributorParams;
354
355 // Cleanup files list.
356 std::vector<std::string> CleanupList;
357
358 // Record a file for cleanup and register signal-time removal if requested.
359 void addToCleanup(StringRef Filename) {
360 CleanupList.push_back(Filename.str());
362 }
363};
364
365namespace {
366constexpr StringRef BCError = "DTLTO backend compilation: ";
367}
368
370public:
373 ArrayRef<DTLTO::Job> JobsArg, bool SaveTempsArg,
374 std::function<void(StringRef)> AddToClenupArg)
375 : Params{ParamsArg}, SaveTemps{SaveTempsArg},
376 AddToCleanup{AddToClenupArg}, Jobs{JobsArg} {};
377
378private:
380 // Keep temporary files when true.
381 bool SaveTemps = false;
382 std::function<void(StringRef)> AddToCleanup;
384 SmallString<128> DistributorJsonFile;
385
386 // Generates a JSON file describing the compilations
387 Error emitJson();
388 // Saves JSON file on a filesystem.
389 Error saveJson();
390
391public:
392 /// Invokes the distributor to compile bitcode modules remotely.
393 ///
394 /// Runs the distributor with the
395 /// JSON manifest path; the distributor spawns remote compiler processes.
396 ///
397 /// \returns Error::success() on success, or an Error if the distributor
398 /// fails.
400};
401
402} // namespace lto
403} // namespace llvm
404
405#endif // LLVM_DTLTO_DTLTO_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ABI
Definition Compiler.h:213
static constexpr StringLiteral Filename
This file defines the SmallString class.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:133
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
Saves strings in the provided stable storage and returns a StringRef with a stable character pointer.
Definition StringSaver.h:22
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
const Config & getConfig() const
Definition DTLTO.h:120
virtual LLVM_ABI Error run(AddStreamFn AddStream, FileCache Cache={}) override
Runs the DTLTO pipeline.
Definition DTLTO.cpp:77
LLVM_ABI Expected< std::shared_ptr< InputFile > > addInput(std::unique_ptr< InputFile > InputPtr) override
Add an input file and prepare it for distribution.
LLVM_ABI DTLTO(Config Conf, unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode, IndexWriteCallback OnWrite, bool EmitIndexFiles, bool EmitImportsFiles, StringRef LinkerOutputFile, StringRef Distributor, ArrayRef< StringRef > DistributorArgs, StringRef RemoteCompiler, ArrayRef< StringRef > RemoteCompilerPrependArgs, ArrayRef< StringRef > RemoteCompilerArgs, AddBufferFn AddBufferArg, bool SaveTempsArg)
Definition DTLTO.h:50
Config & getConfig()
Definition DTLTO.h:119
static lto::ThinBackend writeIndexesBackendInstance()
Definition DTLTO.h:71
LLVM_ABI DistributionDriver(DTLTO::DistributionDriverParams &ParamsArg, ArrayRef< DTLTO::Job > JobsArg, bool SaveTempsArg, std::function< void(StringRef)> AddToClenupArg)
Definition DTLTO.h:372
Error operator()()
Invokes the distributor to compile bitcode modules remotely.
LLVM_ABI LTO(Config Conf, ThinBackend Backend={}, unsigned ParallelCodeGenParallelismLevel=1, LTOKind LTOMode=LTOK_Default)
Create an LTO object.
Definition LTO.cpp:688
virtual void cleanup()
Definition LTO.cpp:705
Config Conf
Definition LTO.h:459
LTOKind
Unified LTO modes.
Definition LTO.h:395
@ LTOK_UnifiedThin
ThinLTO, with Unified LTO enabled.
Definition LTO.h:403
LTOKind LTOMode
Definition LTO.h:620
std::function< void(const std::string &)> IndexWriteCallback
Definition LTO.h:244
LLVM_ABI ThinBackend createWriteIndexesThinBackend(ThreadPoolStrategy Parallelism, std::string OldPrefix, std::string NewPrefix, std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite)
This ThinBackend writes individual module indexes to files, instead of running the individual backend...
Definition LTO.cpp:2022
LLVM_ABI bool RemoveFileOnSignal(StringRef Filename, std::string *ErrMsg=nullptr)
This function registers signal handlers to ensure that if a signal gets delivered that the named file...
This is an optimization pass for GlobalISel generic memory operations.
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition Threading.h:190
std::function< void(unsigned Task, const Twine &ModuleName, std::unique_ptr< MemoryBuffer > MB)> AddBufferFn
This type defines the callback to add a pre-existing file (e.g.
Definition Caching.h:107
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
std::function< Expected< std::unique_ptr< CachedFileStream > >( unsigned Task, const Twine &ModuleName)> AddStreamFn
This type defines the callback to add a file that is generated on the fly.
Definition Caching.h:58
LLVM_ABI Error EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename, const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex)
Emit into OutputFilename the files module ModulePath will import from.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:861
This type represents a file cache system that manages caching of files.
Definition Caching.h:84
LTO configuration.
Definition Config.h:43
ArrayRef< StringRef > DistributorArgs
Definition DTLTO.h:335
ArrayRef< StringRef > RemoteCompilerArgs
Definition DTLTO.h:341
SmallVector< StringRef, 0 > CodegenOptions
Definition DTLTO.h:344
DistributionDriverParams(StringRef DistributorArg, ArrayRef< StringRef > DistributorArgsArg, StringRef RemoteCompilerArg, ArrayRef< StringRef > RemoteCompilerPrependArgsArg, ArrayRef< StringRef > RemoteCompilerArgsArg, StringRef LinkerOutputFileArg)
Definition DTLTO.h:318
DenseSet< StringRef > CommonInputs
Definition DTLTO.h:346
ArrayRef< StringRef > RemoteCompilerPrependArgs
Definition DTLTO.h:339
StringRef SummaryIndexPath
Definition DTLTO.h:152
AddStreamFn CacheAddStream
Definition DTLTO.h:160
StringRef NativeObjectPath
Definition DTLTO.h:150
StringRef ModuleID
Definition DTLTO.h:148
ArrayRef< std::string > ImportsFilesList
Definition DTLTO.h:156
StringRef ImportsPath
Definition DTLTO.h:154
std::string CacheKey
Definition DTLTO.h:158
This type defines the behavior following the thin-link phase during ThinLTO.
Definition LTO.h:319