From 4a2f7b6f880003325415f5f545dcac94c28ac016 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 27 Apr 2024 18:39:09 -0700 Subject: [PATCH 1/4] [RISCV] Support instruction sizes up to 176-bits in disassembler. --- .../RISCV/Disassembler/RISCVDisassembler.cpp | 42 ++++++++++++++++--- llvm/test/MC/RISCV/large-instructions.s | 29 +++++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 llvm/test/MC/RISCV/large-instructions.s diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 7ca20190731ad..c43bda8521fbf 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -656,12 +656,44 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CS) const { - // TODO: This will need modification when supporting instruction set - // extensions with instructions > 32-bits (up to 176 bits wide). + // It's a 16 bit instruction if bit 0 and 1 are not 0x3. + if ((Bytes[0] & 0x3) != 0x3) + return getInstruction16(MI, Size, Bytes, Address, CS); - // It's a 32 bit instruction if bit 0 and 1 are 1. - if ((Bytes[0] & 0x3) == 0x3) + // It's a 32 bit instruction if bit 1:0 are 0x3(checked above) and bits 4:2 + // are not 0x3. + if ((Bytes[0] & 0x1f) != 0x1f) return getInstruction32(MI, Size, Bytes, Address, CS); - return getInstruction16(MI, Size, Bytes, Address, CS); + // 48-bit instructions are encoded as 0bxx011111. + if ((Bytes[0] & 0x3f) == 0x1f) { + Size = Bytes.size() >= 6 ? 6 : 0; + return MCDisassembler::Fail; + } + + // 64-bit instructions are encoded as 0bx0111111. + if ((Bytes[0] & 0x7f) == 0x3f) { + Size = Bytes.size() >= 8 ? 8 : 0; + return MCDisassembler::Fail; + } + + // Need to read a second byte. + if (Bytes.size() < 2) { + Size = 0; + return MCDisassembler::Fail; + } + + // 80-bit through 176-bit instructions are encoded as 0bxnnnxxxx_x1111111. + // Where number of bits is (80 + (nnn * 16)) for nnn != 0b111. + unsigned nnn = (Bytes[1] >> 4) & 0x7; + if (nnn != 0x7) { + Size = 10 + (nnn * 2); + if (Bytes.size() < Size) + Size = 0; + return MCDisassembler::Fail; + } + + // Remaining encodings are reserved for > 176-bit instructions. + Size = 0; + return MCDisassembler::Fail; } diff --git a/llvm/test/MC/RISCV/large-instructions.s b/llvm/test/MC/RISCV/large-instructions.s new file mode 100644 index 0000000000000..b50dbde17d380 --- /dev/null +++ b/llvm/test/MC/RISCV/large-instructions.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \ +# RUN: | llvm-objdump -d - | FileCheck %s + +# CHECK: 011f 4523 8967 +.byte 0x1f, 0x01, 0x23, 0x45, 0x67, 0x89 + +# CHECK: 4523013f cdab8967 +.byte 0x3f, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd + +# CHECK: 007f 4523 8967 cdab feef +.byte 0x7f, 0x00, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe + +# CHECK: 4523107f cdab8967 badcfeef +.byte 0x7f, 0x10, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba + +# CHECK: 207f 4523 8967 cdab feef badc 7698 +.byte 0x7f, 0x20, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76 + +# CHECK: 4523307f cdab8967 badcfeef 32547698 +.byte 0x7f, 0x30, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32 + +# CHECK: 407f 4523 8967 cdab feef badc 7698 3254 1210 +.byte 0x7f, 0x40, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12 + +# CHECK: 4523507f cdab8967 badcfeef 32547698 56341210 +.byte 0x7f, 0x50, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56 + +# CHECK: 607f 4523 8967 cdab feef badc 7698 3254 1210 5634 9a78 +.byte 0x7f, 0x60, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56, 0x78, 0x9a From 765cd8828475fc4a7c330cc5769357fa6a9b7909 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 28 Apr 2024 18:58:38 -0700 Subject: [PATCH 2/4] fixup! Use binary constants --- .../RISCV/Disassembler/RISCVDisassembler.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index c43bda8521fbf..a0a54f01d29f6 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -657,27 +657,27 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, uint64_t Address, raw_ostream &CS) const { // It's a 16 bit instruction if bit 0 and 1 are not 0x3. - if ((Bytes[0] & 0x3) != 0x3) + if ((Bytes[0] & 0b11) != 0b11) return getInstruction16(MI, Size, Bytes, Address, CS); - // It's a 32 bit instruction if bit 1:0 are 0x3(checked above) and bits 4:2 - // are not 0x3. - if ((Bytes[0] & 0x1f) != 0x1f) + // It's a 32 bit instruction if bit 1:0 are 0b11(checked above) and bits 4:2 + // are not 0b111. + if ((Bytes[0] & 0b1'1100) != 0b1'1100) return getInstruction32(MI, Size, Bytes, Address, CS); // 48-bit instructions are encoded as 0bxx011111. - if ((Bytes[0] & 0x3f) == 0x1f) { + if ((Bytes[0] & 0b11'1111) == 0b01'1111) { Size = Bytes.size() >= 6 ? 6 : 0; return MCDisassembler::Fail; } - // 64-bit instructions are encoded as 0bx0111111. - if ((Bytes[0] & 0x7f) == 0x3f) { + // 64-bit instructions are encoded as 0x0111111. + if ((Bytes[0] & 0b111'1111) == 0b011'1111) { Size = Bytes.size() >= 8 ? 8 : 0; return MCDisassembler::Fail; } - // Need to read a second byte. + // Remaining cases need to check a second byte. if (Bytes.size() < 2) { Size = 0; return MCDisassembler::Fail; @@ -685,8 +685,8 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // 80-bit through 176-bit instructions are encoded as 0bxnnnxxxx_x1111111. // Where number of bits is (80 + (nnn * 16)) for nnn != 0b111. - unsigned nnn = (Bytes[1] >> 4) & 0x7; - if (nnn != 0x7) { + unsigned nnn = (Bytes[1] >> 4) & 0b111; + if (nnn != 0b111) { Size = 10 + (nnn * 2); if (Bytes.size() < Size) Size = 0; From 293438de0afd9f1a165fe5b13e4a4d3bddebc57e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 29 Apr 2024 09:32:42 -0700 Subject: [PATCH 3/4] fixup! Use binary instead of hex in one more spot. --- llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index a0a54f01d29f6..85a4d38568a5b 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -656,7 +656,7 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CS) const { - // It's a 16 bit instruction if bit 0 and 1 are not 0x3. + // It's a 16 bit instruction if bit 0 and 1 are not 0b11. if ((Bytes[0] & 0b11) != 0b11) return getInstruction16(MI, Size, Bytes, Address, CS); From 806b25b45d6e294884d41d2bd6cd39cd0cf6dbf6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 29 Apr 2024 09:47:49 -0700 Subject: [PATCH 4/4] fixup! Add missing word to comment. --- llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 85a4d38568a5b..b9e8e1f33d3ae 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -684,7 +684,7 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } // 80-bit through 176-bit instructions are encoded as 0bxnnnxxxx_x1111111. - // Where number of bits is (80 + (nnn * 16)) for nnn != 0b111. + // Where the number of bits is (80 + (nnn * 16)) for nnn != 0b111. unsigned nnn = (Bytes[1] >> 4) & 0b111; if (nnn != 0b111) { Size = 10 + (nnn * 2);