Skip to content

[AMDGPU] Create new directive .amdhsa_inst_pref_size #126622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18213,6 +18213,9 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
``.amdhsa_shared_vgpr_count`` 0 GFX10-GFX11 Controls SHARED_VGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table`.
``.amdhsa_inst_pref_size`` 0 GFX11-GFX12 Controls INST_PREF_SIZE in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table` or
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx12-table`
``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX12 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx12-table`.
``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX12 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5876,6 +5876,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
ValRange);
} else if (ID == ".amdhsa_inst_pref_size") {
if (IVersion.Major < 11)
return Error(IDRange.Start, "directive requires gfx11+", IDRange);
if (IVersion.Major == 11) {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
ValRange);
} else {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
ValRange);
}
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2233,15 +2233,15 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(

// Bits [4-11].
if (isGFX11()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
} else if (isGFX12Plus()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT(
"INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
} else {
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
"COMPUTE_PGM_RSRC3",
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
".amdhsa_shared_vgpr_count");
}
if (IVersion.Major == 11) {
PrintField(KD.compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
".amdhsa_inst_pref_size");
}
if (IVersion.Major >= 12) {
PrintField(KD.compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
".amdhsa_inst_pref_size");
PrintField(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/MC/AMDGPU/hsa-diag-v4.s
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,16 @@
.amdhsa_shared_vgpr_count 15
.end_amdhsa_kernel

// GCN-LABEL: warning: test_amdhsa_inst_pref_size_invalid
// PREGFX10: error: directive requires gfx11+
// NONAMDHSA: error: unknown directive
.warning "test_amdhsa_inst_pref_size_invalid"
.amdhsa_kernel test_amdhsa_inst_pref_size_invalid
.amdhsa_next_free_vgpr 273
.amdhsa_next_free_sgpr 0
.amdhsa_inst_pref_size 15
.end_amdhsa_kernel

// GCN-LABEL: warning: test_next_free_vgpr_invalid
// AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
// NONAMDHSA-NOT: error:
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f00f0000
// OBJDUMP-NEXT: 0070 015021e4 1f0f007f 5e040000 00000000
// special_sgpr
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
Expand Down Expand Up @@ -120,6 +120,7 @@ disabled_user_sgpr:
.amdhsa_workgroup_processor_mode 1
.amdhsa_memory_ordered 1
.amdhsa_forward_progress 1
.amdhsa_inst_pref_size 255
.amdhsa_round_robin_scheduling 1
.amdhsa_exception_fp_ieee_invalid_op 1
.amdhsa_exception_fp_denorm_src 1
Expand Down Expand Up @@ -158,6 +159,7 @@ disabled_user_sgpr:
// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
// ASM-NEXT: .amdhsa_inst_pref_size 255
// ASM-NEXT: .amdhsa_round_robin_scheduling 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
// ASM-NEXT: .amdhsa_shared_vgpr_count 0
// ASM-NEXT: .amdhsa_inst_pref_size 0
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24
// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26
Expand Down Expand Up @@ -180,6 +181,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
// ASM-NEXT: .amdhsa_shared_vgpr_count 0
// ASM-NEXT: .amdhsa_inst_pref_size 0
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
Expand Down
8 changes: 6 additions & 2 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
// expr_defined_later
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 f0020000
// OBJDUMP-NEXT: 0030 05f02fe4 811f007f 000c0000 00000000
// expr_defined
// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f0020000
// OBJDUMP-NEXT: 0070 05f02fe4 811f007f 000c0000 00000000

.text
Expand Down Expand Up @@ -53,6 +53,7 @@ expr_defined:
.amdhsa_workgroup_processor_mode defined_boolean
.amdhsa_memory_ordered defined_boolean
.amdhsa_forward_progress defined_boolean
.amdhsa_inst_pref_size defined_value+6
.amdhsa_exception_fp_ieee_invalid_op defined_boolean
.amdhsa_exception_fp_denorm_src defined_boolean
.amdhsa_exception_fp_ieee_div_zero defined_boolean
Expand Down Expand Up @@ -89,6 +90,7 @@ expr_defined:
.amdhsa_workgroup_processor_mode defined_boolean
.amdhsa_memory_ordered defined_boolean
.amdhsa_forward_progress defined_boolean
.amdhsa_inst_pref_size defined_value+6
.amdhsa_exception_fp_ieee_invalid_op defined_boolean
.amdhsa_exception_fp_denorm_src defined_boolean
.amdhsa_exception_fp_ieee_div_zero defined_boolean
Expand Down Expand Up @@ -132,6 +134,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29
// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
// ASM-NEXT: .amdhsa_inst_pref_size (((defined_value+6)<<4)&4080)>>4
// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24
// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25
Expand Down Expand Up @@ -177,6 +180,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
// ASM-NEXT: .amdhsa_inst_pref_size 47
// ASM-NEXT: .amdhsa_round_robin_scheduling 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: .amdhsa_inst_pref_size 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
Expand Down Expand Up @@ -70,7 +70,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: .amdhsa_inst_pref_size 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
Expand Down Expand Up @@ -114,6 +114,7 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_shared_vgpr_count 0
.amdhsa_inst_pref_size 0
.end_amdhsa_kernel

;--- 3.s
Expand All @@ -127,7 +128,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: .amdhsa_inst_pref_size 63
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
Expand Down Expand Up @@ -171,6 +172,7 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_shared_vgpr_count 1
.amdhsa_inst_pref_size 63
.end_amdhsa_kernel

;--- 4.s
Expand All @@ -184,7 +186,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: .amdhsa_inst_pref_size 63
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
Expand Down Expand Up @@ -228,5 +230,6 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_shared_vgpr_count 1
.amdhsa_inst_pref_size 63
.amdhsa_wavefront_size32 0
.end_amdhsa_kernel
5 changes: 3 additions & 2 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: .amdhsa_inst_pref_size 0
; CHECK-NEXT: ; GLG_EN 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -66,7 +66,7 @@
; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: .amdhsa_inst_pref_size 255
; CHECK-NEXT: ; GLG_EN 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -108,4 +108,5 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_wavefront_size32 0
.amdhsa_inst_pref_size 255
.end_amdhsa_kernel