cuda-phases.cu source code [clang_source_code/test/Driver/cuda-phases.cu]

1	// Tests the phases generated for a CUDA offloading target for different
2	// combinations of:
3	// - Number of gpu architectures;
4	// - Host/device-only compilation;
5	// - User-requested final phase - binary or assembly.
6
7	// REQUIRES: clang-driver
8	// REQUIRES: powerpc-registered-target
9	// REQUIRES: nvptx-registered-target
10	// REQUIRES: amdgpu-registered-target
11	//
12	// Test single gpu architecture with complete compilation.
13	//
14	// Test CUDA NVPTX phases.
15	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
16	// RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \
17	// RUN: \| FileCheck -check-prefixes=BIN,BIN_NV %s
18	//
19	// Test HIP AMDGPU -fgpu-rdc phases.
20	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
21	// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
22	// RUN: \| FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_RDC %s
23	//
24	// Test HIP AMDGPU -fno-gpu-rdc phases (default).
25	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
26	// RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \
27	// RUN: \| FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_NRDC %s
28	//
29	// BIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
30	// BIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
31	// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
32	// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
33	// BIN_NV-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]])
34	// BIN_AMD-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:gfx803]])
35	// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
36	// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
37	// BIN_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
38	// BIN_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
39	// BIN_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object
40	// BIN_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler
41	// BIN_NV-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]])
42	// BIN_NV-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir
43	// BIN_NV-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
44	// BIN_AMD_RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
45	// BIN_AMD_NRDC-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]])
46	// BIN_AMD_NRDC-DAG: [[P7:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, image
47	// BIN_AMD_NRDC-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, hip-fatbin, (device-hip)
48	// BIN_AMD_NRDC-DAG: [[P11:[0-9]+]]: offload, "host-hip (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P8]]}, ir
49	// BIN_AMD_NRDC-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
50	// BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
51	// BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
52	// BIN_AMD_RDC-DAG: [[P15:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH]])
53	// BIN_AMD_RDC-DAG: [[P16:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P14]]},
54	// BIN_AMD_RDC-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P15]]}, object
55
56	//
57	// Test single gpu architecture up to the assemble phase.
58	//
59	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
60	// RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \
61	// RUN: \| FileCheck -check-prefixes=ASM,ASM_NV %s
62	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
63	// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s -S 2>&1 \
64	// RUN: \| FileCheck -check-prefixes=ASM,ASM_AMD %s
65	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
66	// RUN: --cuda-gpu-arch=gfx803 -fcuda-rdc %s -S 2>&1 \
67	// RUN: \| FileCheck -check-prefixes=ASM,ASM_AMD %s
68	// ASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
69	// ASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
70	// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
71	// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
72	// ASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
73	// ASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
74	// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])
75	// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
76	// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
77	// ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])
78
79	//
80	// Test two gpu architectures with complete compilation.
81	//
82	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
83	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
84	// RUN: \| FileCheck -check-prefixes=BIN2,BIN2_NV %s
85	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
86	// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s 2>&1 \
87	// RUN: \| FileCheck -check-prefixes=BIN2,BIN2_AMD %s
88	// BIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
89	// BIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
90	// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
91	// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
92	// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30\|gfx803]])
93	// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
94	// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
95	// BIN2_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
96	// BIN2_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
97	// BIN2_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object
98	// BIN2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler
99	// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35\|gfx900]])
100	// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
101	// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
102	// BIN2_NV-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
103	// BIN2_NV-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
104	// BIN2_NV-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object
105	// BIN2_NV-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler
106	// BIN2_NV-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]])
107	// BIN2_NV-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir
108	// BIN2_NV-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
109	// BIN2_AMD-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
110	// BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
111	// BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
112	// BIN2_AMD-DAG: [[P22:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH1]])
113	// BIN2_AMD-DAG: [[P23:[0-9]+]]: linker, {[[P12]]}, image, (device-[[T]], [[ARCH2]])
114	// BIN2_AMD-DAG: [[P24:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P21]]},
115	// BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P22]]},
116	// BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH2]])" {[[P23]]}, object
117
118	//
119	// Test two gpu architecturess up to the assemble phase.
120	//
121	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
122	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
123	// RUN: \| FileCheck -check-prefixes=ASM2,ASM2_NV %s
124	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
125	// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s -S 2>&1 \
126	// RUN: \| FileCheck -check-prefixes=ASM2,ASM2_AMD %s
127	// ASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]])
128	// ASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
129	// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
130	// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
131	// ASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]])
132	// ASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P3]]}, assembler
133	// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35\|gfx900]])
134	// ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
135	// ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
136	// ASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
137	// ASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
138	// ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])
139	// ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]])
140	// ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]])
141	// ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
142
143	//
144	// Test single gpu architecture with complete compilation in host-only
145	// compilation mode.
146	//
147	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
148	// RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
149	// RUN: \| FileCheck -check-prefixes=HBIN,HBIN_NV %s
150	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
151	// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \
152	// RUN: \| FileCheck -check-prefixes=HBIN,HBIN_AMD %s
153	// HBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
154	// HBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
155	// HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
156	// HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
157	// HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
158	// HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
159	// HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
160	// HBIN-NOT: device
161	//
162	// Test single gpu architecture up to the assemble phase in host-only
163	// compilation mode.
164	//
165	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
166	// RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
167	// RUN: \| FileCheck -check-prefixes=HASM,HASM_NV %s
168	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
169	// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \
170	// RUN: \| FileCheck -check-prefixes=HASM,HASM_AMD %s
171	// HASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
172	// HASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
173	// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
174	// HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
175	// HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
176	// HASM-NOT: device
177
178	//
179	// Test two gpu architectures with complete compilation in host-only
180	// compilation mode.
181	//
182	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
183	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
184	// RUN: \| FileCheck -check-prefixes=HBIN2,HBIN2_NV %s
185	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
186	// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \
187	// RUN: \| FileCheck -check-prefixes=HBIN2,HBIN2_AMD %s
188	// HBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
189	// HBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
190	// HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
191	// HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
192	// HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
193	// HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
194	// HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
195	// HBIN2-NOT: device
196
197	//
198	// Test two gpu architectures up to the assemble phase in host-only
199	// compilation mode.
200	//
201	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
202	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \
203	// RUN: 2>&1 \| FileCheck -check-prefixes=HASM2,HASM2_NV %s
204	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
205	// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \
206	// RUN: 2>&1 \| FileCheck -check-prefixes=HASM2,HASM2_AMD %s
207	// HASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
208	// HASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
209	// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
210	// HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
211	// HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
212	// HASM2-NOT: device
213
214	//
215	// Test single gpu architecture with complete compilation in device-only
216	// compilation mode.
217	//
218	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
219	// RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
220	// RUN: \| FileCheck -check-prefixes=DBIN,DBIN_NV %s
221	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
222	// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
223	// RUN: \| FileCheck -check-prefixes=DBIN,DBIN_AMD %s
224	// DBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
225	// DBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
226	// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
227	// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
228	// DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
229	// DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
230	// DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
231	// DBIN-NOT: host
232	//
233	// Test single gpu architecture up to the assemble phase in device-only
234	// compilation mode.
235	//
236	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
237	// RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
238	// RUN: \| FileCheck -check-prefixes=DASM,DASM_NV %s
239	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
240	// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S 2>&1 \
241	// RUN: \| FileCheck -check-prefixes=DASM,DASM_AMD %s
242	// DASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
243	// DASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
244	// DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
245	// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
246	// DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
247	// DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
248	// DASM-NOT: host
249
250	//
251	// Test two gpu architectures with complete compilation in device-only
252	// compilation mode.
253	//
254	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
255	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
256	// RUN: \| FileCheck -check-prefixes=DBIN2,DBIN2_NV %s
257	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
258	// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
259	// RUN: 2>&1 \| FileCheck -check-prefixes=DBIN2,DBIN2_AMD %s
260	// DBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
261	// DBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
262	// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
263	// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
264	// DBIN2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
265	// DBIN2_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
266	// DBIN2_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object
267	// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35\|gfx900]])
268	// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
269	// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
270	// DBIN2_NV-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
271	// DBIN2_NV-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
272	// DBIN2_NV-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object
273	// DBIN2-NOT: host
274	//
275	// Test two gpu architectures up to the assemble phase in device-only
276	// compilation mode.
277	//
278	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
279	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \
280	// RUN: 2>&1 \| FileCheck -check-prefixes=DASM2,DASM2_NV %s
281	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu \
282	// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
283	// RUN: --cuda-device-only -S 2>&1 \
284	// RUN: \| FileCheck -check-prefixes=DASM2,DASM2_AMD %s
285	// DASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
286	// DASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
287	// DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
288	// DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
289	// DASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
290	// DASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
291	// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35\|gfx900]])
292	// DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
293	// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
294	// DASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
295	// DASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
296	// DASM2-NOT: host
297

Clang Project