Skip to content

Commit d062d77

Browse files
committed
Merge remote-tracking branch 'intel/sycl' into optimize-build
2 parents bf57926 + 64e92cb commit d062d77

File tree

4,750 files changed

+112626
-57027
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

4,750 files changed

+112626
-57027
lines changed

.github/workflows/sycl_linux_build_and_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ on:
4040
build_configure_extra_args:
4141
type: string
4242
required: false
43-
default: "--hip --hip-amd-arch=gfx906 --cuda"
43+
default: "--hip --cuda"
4444
build_artifact_suffix:
4545
type: string
4646
required: true
@@ -86,7 +86,7 @@ jobs:
8686
\"build_cache_root\":\"/__w/\",
8787
\"build_cache_suffix\":\"default\",
8888
\"build_cache_size\":\"2G\",
89-
\"build_configure_extra_args\":\"--hip --hip-amd-arch=gfx906 --cuda\",
89+
\"build_configure_extra_args\":\"--hip --cuda\",
9090
\"build_artifact_suffix\":\"default\",
9191
\"build_upload_artifact\":\"false\",
9292
\"intel_drivers_image\":\"ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest\",

.github/workflows/sycl_nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,5 @@ jobs:
7070
build_cache_root: "/__w/"
7171
build_cache_suffix: new_pm
7272
build_artifact_suffix: new_pm
73-
build_configure_extra_args: '--hip --hip-amd-arch=gfx906 --cuda --cmake-opt=-DLLVM_ENABLE_NEW_PASS_MANAGER=ON'
73+
build_configure_extra_args: '--hip --cuda --cmake-opt=-DLLVM_ENABLE_NEW_PASS_MANAGER=ON'
7474
lts_config: "hip_amdgpu;ocl_x64"

.github/workflows/sycl_stale_issues.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,5 @@ jobs:
1717
exempt-issue-labels: 'confirmed,hip,cuda,enhancement,help wanted,upstream'
1818
stale-issue-label: 'stale'
1919
exempt-all-issue-assignees: true
20+
operations-per-run: 200
21+

.github/workflows/sycl_windows_build_and_test.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ jobs:
1717
run: |
1818
choco install -y cuda --version 11.6.0.51123
1919
choco install -y ninja
20+
choco install -y sccache --version 0.2.15
2021
refreshenv
2122
echo CUDA_PATH=%CUDA_PATH%
2223
echo CUDA_PATH=%CUDA_PATH% >> %GITHUB_ENV%
@@ -49,6 +50,8 @@ jobs:
4950
--cmake-opt="-DCMAKE_C_COMPILER=cl" ^
5051
--cmake-opt="-DCMAKE_CXX_COMPILER=cl" ^
5152
--cmake-opt="-DCMAKE_INSTALL_PREFIX=%GITHUB_WORKSPACE%\install" ^
53+
--cmake-opt="-DCMAKE_CXX_COMPILER_LAUNCHER=sccache" ^
54+
--cmake-opt="-DCMAKE_C_COMPILER_LAUNCHER=sccache" ^
5255
--cuda
5356
- name: Build
5457
shell: cmd
@@ -64,4 +67,4 @@ jobs:
6467
uses: actions/upload-artifact@v2
6568
with:
6669
name: sycl_windows_default
67-
path: install/**/*
70+
path: install/**/*

README.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,86 @@ See [LICENSE.txt](sycl/LICENSE.TXT) for details.
2828
See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
2929

3030
*\*Other names and brands may be claimed as the property of others.*
31+
32+
This is an example work-flow and configuration to get and build the LLVM source:
33+
34+
1. Checkout LLVM (including related sub-projects like Clang):
35+
36+
* ``git clone https://github.com/llvm/llvm-project.git``
37+
38+
* Or, on windows, ``git clone --config core.autocrlf=false
39+
https://github.com/llvm/llvm-project.git``
40+
41+
2. Configure and build LLVM and Clang:
42+
43+
* ``cd llvm-project``
44+
45+
* ``cmake -S llvm -B build -G <generator> [options]``
46+
47+
Some common build system generators are:
48+
49+
* ``Ninja`` --- for generating [Ninja](https://ninja-build.org)
50+
build files. Most llvm developers use Ninja.
51+
* ``Unix Makefiles`` --- for generating make-compatible parallel makefiles.
52+
* ``Visual Studio`` --- for generating Visual Studio projects and
53+
solutions.
54+
* ``Xcode`` --- for generating Xcode projects.
55+
56+
Some common options:
57+
58+
* ``-DLLVM_ENABLE_PROJECTS='...'`` and ``-DLLVM_ENABLE_RUNTIMES='...'`` ---
59+
semicolon-separated list of the LLVM sub-projects and runtimes you'd like to
60+
additionally build. ``LLVM_ENABLE_PROJECTS`` can include any of: clang,
61+
clang-tools-extra, cross-project-tests, flang, libc, libclc, lld, lldb,
62+
mlir, openmp, polly, or pstl. ``LLVM_ENABLE_RUNTIMES`` can include any of
63+
libcxx, libcxxabi, libunwind, compiler-rt, libc or openmp. Some runtime
64+
projects can be specified either in ``LLVM_ENABLE_PROJECTS`` or in
65+
``LLVM_ENABLE_RUNTIMES``.
66+
67+
For example, to build LLVM, Clang, libcxx, and libcxxabi, use
68+
``-DLLVM_ENABLE_PROJECTS="clang" -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi"``.
69+
70+
* ``-DCMAKE_INSTALL_PREFIX=directory`` --- Specify for *directory* the full
71+
path name of where you want the LLVM tools and libraries to be installed
72+
(default ``/usr/local``). Be careful if you install runtime libraries: if
73+
your system uses those provided by LLVM (like libc++ or libc++abi), you
74+
must not overwrite your system's copy of those libraries, since that
75+
could render your system unusable. In general, using something like
76+
``/usr`` is not advised, but ``/usr/local`` is fine.
77+
78+
* ``-DCMAKE_BUILD_TYPE=type`` --- Valid options for *type* are Debug,
79+
Release, RelWithDebInfo, and MinSizeRel. Default is Debug.
80+
81+
* ``-DLLVM_ENABLE_ASSERTIONS=On`` --- Compile with assertion checks enabled
82+
(default is Yes for Debug builds, No for all other build types).
83+
84+
* ``cmake --build build [-- [options] <target>]`` or your build system specified above
85+
directly.
86+
87+
* The default target (i.e. ``ninja`` or ``make``) will build all of LLVM.
88+
89+
* The ``check-all`` target (i.e. ``ninja check-all``) will run the
90+
regression tests to ensure everything is in working order.
91+
92+
* CMake will generate targets for each tool and library, and most
93+
LLVM sub-projects generate their own ``check-<project>`` target.
94+
95+
* Running a serial build will be **slow**. To improve speed, try running a
96+
parallel build. That's done by default in Ninja; for ``make``, use the option
97+
``-j NNN``, where ``NNN`` is the number of parallel jobs, e.g. the number of
98+
CPUs you have.
99+
100+
* For more information see [CMake](https://llvm.org/docs/CMake.html)
101+
102+
Consult the
103+
[Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-started-with-llvm)
104+
page for detailed information on configuring and compiling LLVM. You can visit
105+
[Directory Layout](https://llvm.org/docs/GettingStarted.html#directory-layout)
106+
to learn about the layout of the source code tree.
107+
108+
## Getting in touch
109+
110+
Join [LLVM Discourse forums](https://discourse.llvm.org/), [discord chat](https://discord.gg/xS7Z362) or #llvm IRC channel on [OFTC](https://oftc.net/).
111+
112+
The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for
113+
participants to all modes of communication within the project.

bolt/docs/Heatmaps.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@ $ perf record -e cycles:u -j any,u [-p PID|-a] -- sleep <interval>
2323
Note that at the moment running with LBR (`-j any,u` or `-b`) is
2424
a requirement.
2525

26-
Once the run is complete, and `perf.data` is generated, run BOLT in
27-
a heatmap mode:
26+
Once the run is complete, and `perf.data` is generated, run llvm-bolt-heatmap:
2827

2928
```bash
30-
$ llvm-bolt heatmap -p perf.data <executable>
29+
$ llvm-bolt-heatmap -p perf.data <executable>
3130
```
3231

3332
By default the heatmap will be dumped to *stdout*. You can change it

bolt/docs/OptimizingClang.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Collecting accurate profile requires running `perf` on a hardware that
4141
implements taken branch sampling (`-b/-j` flag). For that reason, it may not be possible to
4242
collect the accurate profile in a virtualized environment, e.g. in the cloud.
4343
We do support regular sampling profiles, but the performance
44-
improvements are expected to be more modest.
44+
improvements are expected to be more modest.
4545

4646
```bash
4747
$ mkdir ${TOPLEV}/stage3
@@ -211,7 +211,8 @@ $ cd ${TOPLEV}/stage1
211211
$ cmake -G Ninja ${TOPLEV}/llvm-project/llvm -DLLVM_TARGETS_TO_BUILD=X86 \
212212
-DCMAKE_BUILD_TYPE=Release \
213213
-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_ASM_COMPILER=gcc \
214-
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \
214+
-DLLVM_ENABLE_PROJECTS="clang;lld" \
215+
-DLLVM_ENABLE_RUNTIMES="compiler-rt" \
215216
-DCOMPILER_RT_BUILD_SANITIZERS=OFF -DCOMPILER_RT_BUILD_XRAY=OFF \
216217
-DCOMPILER_RT_BUILD_LIBFUZZER=OFF \
217218
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage1/install

bolt/include/bolt/Passes/BinaryPasses.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,16 @@ class ShortenInstructions : public BinaryFunctionPass {
295295

296296
/// Perform simple peephole optimizations.
297297
class Peepholes : public BinaryFunctionPass {
298+
public:
299+
enum PeepholeOpts : char {
300+
PEEP_NONE = 0x0,
301+
PEEP_DOUBLE_JUMPS = 0x2,
302+
PEEP_TAILCALL_TRAPS = 0x4,
303+
PEEP_USELESS_BRANCHES = 0x8,
304+
PEEP_ALL = 0xf
305+
};
306+
307+
private:
298308
uint64_t NumDoubleJumps{0};
299309
uint64_t TailCallTraps{0};
300310
uint64_t NumUselessCondBranches{0};

bolt/include/bolt/Passes/LivenessAnalysis.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "bolt/Passes/DataflowAnalysis.h"
1313
#include "bolt/Passes/RegAnalysis.h"
14+
#include "llvm/MC/MCRegisterInfo.h"
1415
#include "llvm/Support/CommandLine.h"
1516

1617
namespace opts {

bolt/include/bolt/Passes/ReachingDefOrUse.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "bolt/Passes/DataflowAnalysis.h"
1313
#include "bolt/Passes/RegAnalysis.h"
1414
#include "llvm/ADT/Optional.h"
15+
#include "llvm/MC/MCRegisterInfo.h"
1516
#include "llvm/Support/CommandLine.h"
1617
#include "llvm/Support/Timer.h"
1718

0 commit comments

Comments
 (0)