Skip to content

Commit 618adc7

Browse files
authored
[RISCV] Support instruction sizes up to 176-bits in disassembler. (#90371)
We don't have any instructions defined yet, but that we can still read the correct number of bytes when disassembling. This should better match GNU objdump behavior.
1 parent 99df06a commit 618adc7

File tree

2 files changed

+66
-5
lines changed

2 files changed

+66
-5
lines changed

llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -656,12 +656,44 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
656656
ArrayRef<uint8_t> Bytes,
657657
uint64_t Address,
658658
raw_ostream &CS) const {
659-
// TODO: This will need modification when supporting instruction set
660-
// extensions with instructions > 32-bits (up to 176 bits wide).
659+
// It's a 16 bit instruction if bit 0 and 1 are not 0b11.
660+
if ((Bytes[0] & 0b11) != 0b11)
661+
return getInstruction16(MI, Size, Bytes, Address, CS);
661662

662-
// It's a 32 bit instruction if bit 0 and 1 are 1.
663-
if ((Bytes[0] & 0x3) == 0x3)
663+
// It's a 32 bit instruction if bit 1:0 are 0b11(checked above) and bits 4:2
664+
// are not 0b111.
665+
if ((Bytes[0] & 0b1'1100) != 0b1'1100)
664666
return getInstruction32(MI, Size, Bytes, Address, CS);
665667

666-
return getInstruction16(MI, Size, Bytes, Address, CS);
668+
// 48-bit instructions are encoded as 0bxx011111.
669+
if ((Bytes[0] & 0b11'1111) == 0b01'1111) {
670+
Size = Bytes.size() >= 6 ? 6 : 0;
671+
return MCDisassembler::Fail;
672+
}
673+
674+
// 64-bit instructions are encoded as 0x0111111.
675+
if ((Bytes[0] & 0b111'1111) == 0b011'1111) {
676+
Size = Bytes.size() >= 8 ? 8 : 0;
677+
return MCDisassembler::Fail;
678+
}
679+
680+
// Remaining cases need to check a second byte.
681+
if (Bytes.size() < 2) {
682+
Size = 0;
683+
return MCDisassembler::Fail;
684+
}
685+
686+
// 80-bit through 176-bit instructions are encoded as 0bxnnnxxxx_x1111111.
687+
// Where the number of bits is (80 + (nnn * 16)) for nnn != 0b111.
688+
unsigned nnn = (Bytes[1] >> 4) & 0b111;
689+
if (nnn != 0b111) {
690+
Size = 10 + (nnn * 2);
691+
if (Bytes.size() < Size)
692+
Size = 0;
693+
return MCDisassembler::Fail;
694+
}
695+
696+
// Remaining encodings are reserved for > 176-bit instructions.
697+
Size = 0;
698+
return MCDisassembler::Fail;
667699
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
2+
# RUN: | llvm-objdump -d - | FileCheck %s
3+
4+
# CHECK: 011f 4523 8967 <unknown>
5+
.byte 0x1f, 0x01, 0x23, 0x45, 0x67, 0x89
6+
7+
# CHECK: 4523013f cdab8967 <unknown>
8+
.byte 0x3f, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd
9+
10+
# CHECK: 007f 4523 8967 cdab feef <unknown>
11+
.byte 0x7f, 0x00, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe
12+
13+
# CHECK: 4523107f cdab8967 badcfeef <unknown>
14+
.byte 0x7f, 0x10, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba
15+
16+
# CHECK: 207f 4523 8967 cdab feef badc 7698 <unknown>
17+
.byte 0x7f, 0x20, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76
18+
19+
# CHECK: 4523307f cdab8967 badcfeef 32547698 <unknown>
20+
.byte 0x7f, 0x30, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32
21+
22+
# CHECK: 407f 4523 8967 cdab feef badc 7698 3254 1210 <unknown>
23+
.byte 0x7f, 0x40, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12
24+
25+
# CHECK: 4523507f cdab8967 badcfeef 32547698 56341210 <unknown>
26+
.byte 0x7f, 0x50, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56
27+
28+
# CHECK: 607f 4523 8967 cdab feef badc 7698 3254 1210 5634 9a78 <unknown>
29+
.byte 0x7f, 0x60, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56, 0x78, 0x9a

0 commit comments

Comments
 (0)