@@ -2503,26 +2503,77 @@ device kernel, the attribute is not ignored and it is propagated to the kernel.
2503
2503
[[intel::num_simd_work_items(N)]] void operator()() const {}
2504
2504
};
2505
2505
2506
- If the`` intel::reqd_work_group_size`` or ``cl::reqd_work_group_size``
2507
- attribute is specified on a declaration along with a
2508
- intel::num_simd_work_items attribute, the work group size attribute
2509
- argument (the first argument) must be evenly divisible by the argument specified
2510
- in the ``intel::num_simd_work_items`` attribute.
2506
+ If the ``reqd_work_group_size`` attribute is specified on a declaration along
2507
+ with ``num_simd_work_items``, the required work group size specified
2508
+ by ``num_simd_work_items`` attribute must evenly divide the index that
2509
+ increments fastest in the ``reqd_work_group_size`` attribute.
2510
+
2511
+ The arguments to ``reqd_work_group_size`` are ordered based on which index
2512
+ increments the fastest. In OpenCL, the first argument is the index that
2513
+ increments the fastest, and in SYCL, the last argument is the index that
2514
+ increments the fastest.
2515
+
2516
+ In OpenCL, all three arguments are required.
2517
+
2518
+ In SYCL, the attribute accepts either one, two, or three arguments; in each
2519
+ form, the last (or only) argument is the index that increments fastest.
2520
+ The number of arguments passed to the attribute must match the dimensionality
2521
+ of the kernel the attribute is applied to.
2511
2522
2512
2523
.. code-block:: c++
2513
2524
2525
+ // Note, '64' is evenly divisible by '4'; in SYCL, the last
2526
+ // argument to the attribute is the one which increments fastest.
2514
2527
struct func {
2515
2528
[[intel::num_simd_work_items(4)]]
2516
- [[intel::reqd_work_group_size(64, 64 , 64)]]
2529
+ [[intel::reqd_work_group_size(7, 4 , 64)]]
2517
2530
void operator()() const {}
2518
2531
};
2519
2532
2533
+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2534
+ // argument to the attribute is the one which increments fastest.
2520
2535
struct bar {
2521
- [[intel::reqd_work_group_size(64, 64, 64)]]
2536
+ [[intel::reqd_work_group_size(1, 1, 8)]]
2537
+ [[intel::num_simd_work_items(8)]]
2538
+ void operator()() const {}
2539
+ };
2540
+
2541
+ // Note, '10' is evenly divisible by '5'; in SYCL, the last
2542
+ // argument to the attribute is the one which increments fastest.
2543
+ [[cl::reqd_work_group_size(7, 5, 10)]]
2544
+ [[intel::num_simd_work_items(5)]] void fun2() {}
2545
+
2546
+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2547
+ // argument to the attribute is the one which increments fastest.
2548
+ [[intel::num_simd_work_items(4)]]
2549
+ [[cl::reqd_work_group_size(5, 4, 8)]] void fun3() {}
2550
+
2551
+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2552
+ // argument to the attribute is the one which increments fastest.
2553
+ struct func1 {
2554
+ [[intel::num_simd_work_items(8)]]
2555
+ [[cl::reqd_work_group_size(1, 1, 8)]]
2556
+ void operator()() const {}
2557
+ };
2558
+
2559
+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2560
+ // argument to the attribute is the one which increments fastest.
2561
+ struct bar1 {
2562
+ [[cl::reqd_work_group_size(7, 4, 8)]]
2522
2563
[[intel::num_simd_work_items(4)]]
2523
2564
void operator()() const {}
2524
2565
};
2525
2566
2567
+ // Note, '4' is evenly divisible by '2'; in SYCL, the last
2568
+ // argument to the attribute is the one which increments fastest.
2569
+ [[intel::num_simd_work_items(2)]]
2570
+ __attribute__((reqd_work_group_size(3, 2, 4))) void test();
2571
+
2572
+ // Note, '8' is evenly divisible by '2'; in SYCL, the last
2573
+ // argument to the attribute is the one which increments fastest.
2574
+ __attribute__((reqd_work_group_size(3, 2, 8)))
2575
+ [intel::num_simd_work_items(2)]] void test();
2576
+
2526
2577
}];
2527
2578
}
2528
2579
@@ -2636,6 +2687,77 @@ In OpenCL C, this attribute is available in GNU spelling
2636
2687
2637
2688
__kernel __attribute__((reqd_work_group_size(8, 16, 32))) void test() {}
2638
2689
2690
+ The arguments to ``reqd_work_group_size`` are ordered based on which index
2691
+ increments the fastest. In OpenCL, the first argument is the index that
2692
+ increments the fastest, and in SYCL, the last argument is the index that
2693
+ increments the fastest.
2694
+
2695
+ In OpenCL, all three arguments are required.
2696
+
2697
+ In SYCL, the attribute accepts either one, two, or three arguments; in each
2698
+ form, the last (or only) argument is the index that increments fastest. The
2699
+ number of arguments passed to the attribute must match the dimensionality of
2700
+ the kernel the attribute is applied to.
2701
+
2702
+ If the ``reqd_work_group_size attribute`` is specified on a declaration along
2703
+ with ``num_simd_work_items``, the required work group size specified by
2704
+ ``num_simd_work_items`` must evenly divide the index that increments fastest
2705
+ in the ``reqd_work_group_size`` attribute.
2706
+
2707
+ .. code-block:: c++
2708
+
2709
+ // Note, '64' is evenly divisible by '4'; in SYCL, the last
2710
+ // argument to the attribute is the one which increments fastest.
2711
+ struct func {
2712
+ [[intel::num_simd_work_items(4)]]
2713
+ [[intel::reqd_work_group_size(7, 4, 64)]]
2714
+ void operator()() const {}
2715
+ };
2716
+
2717
+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2718
+ // argument to the attribute is the one which increments fastest.
2719
+ struct bar {
2720
+ [[intel::reqd_work_group_size(1, 1, 8)]]
2721
+ [[intel::num_simd_work_items(8)]]
2722
+ void operator()() const {}
2723
+ };
2724
+
2725
+ // Note, '10' is evenly divisible by '5'; in SYCL, the last
2726
+ // argument to the attribute is the one which increments fastest.
2727
+ [[cl::reqd_work_group_size(7, 5, 10)]]
2728
+ [[intel::num_simd_work_items(5)]] void fun2() {}
2729
+
2730
+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2731
+ // argument to the attribute is the one which increments fastest.
2732
+ [[intel::num_simd_work_items(4)]]
2733
+ [[cl::reqd_work_group_size(5, 4, 8)]] void fun3() {}
2734
+
2735
+ // Note, '8' is evenly divisible by '8'; in SYCL, the last
2736
+ // argument to the attribute is the one which increments fastest.
2737
+ struct func1 {
2738
+ [[intel::num_simd_work_items(8)]]
2739
+ [[cl::reqd_work_group_size(1, 1, 8)]]
2740
+ void operator()() const {}
2741
+ };
2742
+
2743
+ // Note, '8' is evenly divisible by '4'; in SYCL, the last
2744
+ // argument to the attribute is the one which increments fastest.
2745
+ struct bar1 {
2746
+ [[cl::reqd_work_group_size(7, 4, 8)]]
2747
+ [[intel::num_simd_work_items(4)]]
2748
+ void operator()() const {}
2749
+ };
2750
+
2751
+ // Note, '4' is evenly divisible by '2'; in SYCL, the last
2752
+ // argument to the attribute is the one which increments fastest.
2753
+ [[intel::num_simd_work_items(2)]]
2754
+ __attribute__((reqd_work_group_size(3, 2, 4))) void test();
2755
+
2756
+ // Note, '8' is evenly divisible by '2'; in SYCL, the last
2757
+ // argument to the attribute is the one which increments fastest.
2758
+ __attribute__((reqd_work_group_size(3, 2, 8)))
2759
+ [intel::num_simd_work_items(2)]] void test();
2760
+
2639
2761
}];
2640
2762
}
2641
2763
0 commit comments