diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h
index 189bc2208c9ff..3621348f6a2ec 100644
--- a/clang/include/clang/Driver/Action.h
+++ b/clang/include/clang/Driver/Action.h
@@ -738,7 +738,7 @@ class SYCLPostLinkJobAction : public JobAction {
void anchor() override;
public:
- // The tempfiletable management relies on a shadowing the main file type by
+ // The tempfiletable management relies on shadowing the main file type by
// types::TY_Tempfiletable. The problem of shadowing is it prevents its
// integration with clang tools that relies on the file type to properly set
// args.
diff --git a/sycl/doc/CompilerAndRuntimeDesign.md b/sycl/doc/CompilerAndRuntimeDesign.md
index e98233eb73951..f55c5df905aa2 100644
--- a/sycl/doc/CompilerAndRuntimeDesign.md
+++ b/sycl/doc/CompilerAndRuntimeDesign.md
@@ -548,13 +548,15 @@ down to the NVPTX Back End. All produced bitcode depends on two libraries,
During the "PTX target processing" in the device linking step [Device
code post-link step](#device-code-post-link-step), the llvm bitcode
-objects for the CUDA target are linked together alongside
-`libspirv-nvptx64--nvidiacl.bc` and `libdevice.bc`, compiled to PTX
-using the NVPTX backend and assembled into a cubin using the `ptxas`
-tool (part of the CUDA SDK). The PTX file and cubin are assembled
-together using `fatbinary` to produce a CUDA fatbin. The CUDA fatbin
-then replaces the llvm bitcode file in the file table generated by
-`sycl-post-link`. The resulting table is passed to the offload wrapper tool.
+objects for the CUDA target are linked together during the common
+`llvm-link` step and then split using the `sycl-post-link` tool.
+For each temporary bitcode file, clang is invoked for the temporary file to link
+`libspirv-nvptx64--nvidiacl.bc` and `libdevice.bc` and compile the resulting
+module to PTX using the NVPTX backend. The resulting PTX file is assembled
+into a cubin using the `ptxas` tool (part of the CUDA SDK). The PTX file and
+cubin are assembled together using `fatbinary` to produce a CUDA fatbin.
+The produced CUDA fatbins then replace the llvm bitcode files in the file table generated
+by `sycl-post-link`. The resulting table is passed to the offload wrapper tool.

diff --git a/sycl/doc/images/DeviceLinkAndWrap.svg b/sycl/doc/images/DeviceLinkAndWrap.svg
index 10ea7704f79d8..1234b368fc18b 100644
--- a/sycl/doc/images/DeviceLinkAndWrap.svg
+++ b/sycl/doc/images/DeviceLinkAndWrap.svg
@@ -9,7 +9,7 @@
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
sodipodi:docname="DeviceLinkAndWrap.svg"
- inkscape:version="1.0.2-2 (e86c870879, 2021-01-15)"
+ inkscape:version="0.92.3 (2405546, 2018-03-11)"
id="svg8"
version="1.1"
viewBox="0 0 205.79753 221.03191"
@@ -1827,6 +1827,78 @@
x2="517.42999"
y2="349.29999"
spreadMethod="pad" />
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+ Device LLVM IR
+ id="tspan897">Device LLVM IR
+
-
@@ -2143,7 +2207,7 @@
id="path1013"
inkscape:connector-curvature="0" />
@@ -2158,7 +2222,7 @@
id="path1019"
inkscape:connector-curvature="0" />
@@ -2173,7 +2237,7 @@
id="path1025"
inkscape:connector-curvature="0" />
@@ -2239,12 +2303,12 @@
@@ -2334,11 +2398,13 @@
+ file
+ id="tspan1119">file
+
+ -
+ id="tspan1131">-
+
+ table
+ id="tspan1143">table
+
+ -
+ id="tspan1155">-
+
+ tform
+ id="tspan1167">tform
+
+ extract “Code”
+ id="tspan1179">extract “Code”
+
+ AOT backend
+ id="tspan1243">AOT backend
+
+
@@ -2546,11 +2629,13 @@
+ PTX target
+ id="tspan1307">PTX target
+
+ processing
+ id="tspan1319">processing
+
+ clang
+ id="tspan1383">clang
+
+ -
+ id="tspan1395">-
+
+ offload
+ id="tspan1407">offload
+
+ -
+ id="tspan1419">-
+
+ wrapper
+ id="tspan1431">wrapper
+
+ s
+ id="tspan1495">s
+
+ ycl
+ id="tspan1507">ycl
+
+ -
+ id="tspan1519">-
+
+ post
+ id="tspan1531">post
+
+ -
+ id="tspan1543">-
+
+ link
+ id="tspan1555">link
+
+ file
+ id="tspan1619">file
+
+ -
+ id="tspan1631">-
+
+ table
+ id="tspan1643">table
+
+ -
+ id="tspan1655">-
+
+ tform
+ id="tspan1667">tform
+
+ replace “Code”
+ id="tspan1679">replace “Code”
+
+ llvm
+ id="tspan1743">llvm
+
+ -
+ id="tspan1755">-
+
+ spirv
+ id="tspan1767">spirv
+
-
@@ -3533,11 +3655,13 @@
+ Wrapper object
+ id="tspan2127">Wrapper object
+
+ Device code
+ id="tspan2175">Device code
+
+ (f r o m l
+ id="tspan2191">(f r o m l
+
+ l
+ id="tspan2203">l
+
+ v m
+ id="tspan2215">v m
+
+ -
+ id="tspan2227">-
+
+ l i n k)
+ id="tspan2239">l i n k)
+
+ (t o h o s t l i n k e r)
+ id="tspan2251">(t o h o s t l i n k e r)
+
-
@@ -3726,11 +3859,13 @@
+ TY_tempfiletable
+ id="tspan2265">TY_tempfiletable
+
+ TY_tempfilelist
-
-
-
-
- s
-
-
-
-
- ingle file
-
-
-
-
-
- PTX target compilation
+ id="tspan2277">TY_tempfilelist
+
+ Split code
+ id="tspan2343">Split code
+
+ file
+ id="tspan1119-5">file
+
+ -
+ id="tspan1131-3">-
+
+ table
+ id="tspan1143-6">table
+
+ -
+ id="tspan1155-0">-
+
+ tform
+ id="tspan1167-8">tform
+
copy “Code”
+ All targets
+ y="0">All targets
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/sycl/doc/images/DevicePTXProcessing.svg b/sycl/doc/images/DevicePTXProcessing.svg
index a639d2906a6c8..df690ec5fb08b 100644
--- a/sycl/doc/images/DevicePTXProcessing.svg
+++ b/sycl/doc/images/DevicePTXProcessing.svg
@@ -9,7 +9,7 @@
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
sodipodi:docname="DevicePTXProcessing.svg"
- inkscape:version="1.0.2-2 (e86c870879, 2021-01-15)"
+ inkscape:version="0.92.3 (2405546, 2018-03-11)"
id="svg8"
version="1.1"
viewBox="0 0 205.79753 221.03191"
@@ -2204,16 +2204,16 @@
fit-margin-left="0"
fit-margin-top="0"
inkscape:window-maximized="1"
- inkscape:window-y="-8"
- inkscape:window-x="1912"
- inkscape:window-height="1017"
+ inkscape:window-y="0"
+ inkscape:window-x="1920"
+ inkscape:window-height="1163"
inkscape:window-width="1920"
showgrid="false"
inkscape:document-rotation="0"
inkscape:current-layer="layer1"
inkscape:document-units="mm"
- inkscape:cy="361.85228"
- inkscape:cx="371.91241"
+ inkscape:cy="330.42371"
+ inkscape:cx="228.34098"
inkscape:zoom="1.4"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
@@ -2318,14 +2318,16 @@
d="m 125.31238,20.80355 0.32455,-1.291166 1.29117,-0.324556 z" />
+ File table
+ y="16.988504">File table
+
+ Clang
+ y="84.249756">Clang
+
@@ -2491,11 +2495,13 @@
+ clang
+ x="0 7.7220001 15.444 23.166 30.94416">clang
+
+ -
+ x="0">-
+
+ offload
+ x="0 7.7922001 15.5142 23.2362 30.9582 38.736359 46.528561">offload
+
+ -
+ x="0">-
+
+ wrapper
+ x="0 7.7922001 15.5142 23.2362 30.9582 38.736359 46.458359">wrapper
+
+ PTX target processing
+ y="60.97049">PTX target processing
+
@@ -2681,11 +2697,13 @@
+ Wrapper object
+ x="0 12.11652 16.79184 23.517 30.831841 38.146679 45.138599 50.038559 53.141399 60.540482 67.911484 71.267036 78.258957 84.099602">Wrapper object
+
+ Device code
+ x="0 8.6493597 15.484464 21.841393 25.076113 30.968927 37.930607 41.151264 46.917503 54.329231 61.712833">Device code
+
+ (from sycl-post-link)
+ y="17.476978">(from sycl-post-link)
+
@@ -2792,11 +2814,13 @@
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.953px;font-family:Calibri;-inkscape-font-specification:'Calibri, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#404040;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.352778"
id="text899-9"
x="73.031509"
- y="68.195061">
+ libspirv.bc
+ sodipodi:role="line">libspirv.bc
+
+ libdevice.bc
+ sodipodi:role="line">libdevice.bc
+
+ ptxas
+ y="113.47669">ptxas
+
@@ -2936,11 +2964,13 @@
id="text1309-7"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.953px;font-family:Consolas;-inkscape-font-specification:'Consolas, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.352778"
x="102.06758"
- y="137.37424">
+ fatbin
+ y="137.37424">fatbin
+
+ ptx
+ id="tspan3794">ptx
+
+ cubin
+ id="tspan3916">cubin
+
+ LLVM IR
+ y="73.327454">LLVM IR
+
+ CUDA fatbin
+ id="tspan2303">CUDA fatbin
+
+ (to host linker)
+ y="216.68318">(to host linker)
+
+ (nvptx backend)
- (Single row)
+ y="88.973877">(nvptx backend)
+
+ file-table-tform
+ y="33.349266">file-table-tform
+
+ (Copy "Code")
+ y="37.806171">(Copy "Code")
+
+ LLVM IR
+ y="47.484673">LLVM IR
+
+ file-table-tform
+ y="161.4454">file-table-tform
+
+ (Replace "Code")
+ y="165.9023">(Replace "Code")
+
+ File table
+ id="tspan2303-1">File table
+