Skip to content

Fix parsing of integer literals with base prefix #106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions esp32_ulp/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,13 @@ def fill(self, section, amount, fill_byte):
raise ValueError('fill in bss section not allowed')
if section is TEXT: # TODO: text section should be filled with NOPs
raise ValueError('fill/skip/align in text section not supported')
fill = int(fill_byte or 0).to_bytes(1, 'little') * amount
fill = int(self.opcodes.eval_arg(str(fill_byte or 0))).to_bytes(1, 'little') * amount
self.offsets[section] += len(fill)
if section is not BSS:
self.sections[section].append(fill)

def d_skip(self, amount, fill=None):
amount = int(amount)
amount = int(self.opcodes.eval_arg(amount))
self.fill(self.section, amount, fill)

d_space = d_skip
Expand All @@ -246,7 +246,7 @@ def d_global(self, symbol):
self.symbols.set_global(symbol)

def append_data(self, wordlen, args):
data = [int(arg).to_bytes(wordlen, 'little') for arg in args]
data = [int(self.opcodes.eval_arg(arg)).to_bytes(wordlen, 'little') for arg in args]
self.append_section(b''.join(data))

def d_byte(self, *args):
Expand Down
29 changes: 27 additions & 2 deletions esp32_ulp/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,38 @@ def eval_arg(arg):
_, _, sym_value = symbols.get_sym(token)
parts.append(str(sym_value))
else:
parts.append(token)
try:
# attempt to parse, to convert numbers with base prefix correctly
int_token = parse_int(token)
parts.append(str(int_token))
except ValueError:
parts.append(token)
parts = "".join(parts)
if not validate_expression(parts):
raise ValueError('Unsupported expression: %s' % parts)
return eval(parts)


def parse_int(literal):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with this code base, but would it make sense to factor this function out into a separate file, so it can be reused in opcodes_s2.py?

Similarly, could have a single unit test for this function in a separate testing file.

(Just a suggestion 😄 )

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, also from a code (de-)duplication aspect.

"""
Parses string literals into integers, using base prefixes
0x (hex), 0b (binary), and 0o or legacy 0NNN (octal).
Without prefix will be treated as decimal.
"""
if len(literal) > 2:
prefix_start = 1 if literal[0] == '-' else 0 # skip negative sign if present

if literal[prefix_start] == "0":
prefix = literal[prefix_start + 1]
if prefix == "x":
return int(literal, 16)
elif prefix == "b":
return int(literal, 2)
return int(literal, 8) # legacy octal (e.g. 077)

return int(literal) # implicit base10


def arg_qualify(arg):
"""
look at arg and qualify its type:
Expand All @@ -311,7 +336,7 @@ def arg_qualify(arg):
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
return ARG(COND, arg_lower, arg)
try:
return ARG(IMM, int(arg), arg)
return ARG(IMM, parse_int(arg), arg)
except ValueError:
pass
try:
Expand Down
29 changes: 27 additions & 2 deletions esp32_ulp/opcodes_s2.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,13 +301,38 @@ def eval_arg(arg):
_, _, sym_value = symbols.get_sym(token)
parts.append(str(sym_value))
else:
parts.append(token)
try:
# attempt to parse, to convert numbers with base prefix correctly
int_token = parse_int(token)
parts.append(str(int_token))
except ValueError:
parts.append(token)
parts = "".join(parts)
if not validate_expression(parts):
raise ValueError('Unsupported expression: %s' % parts)
return eval(parts)


def parse_int(literal):
"""
Parses string literals into integers, using base prefixes
0x (hex), 0b (binary), and 0o or legacy 0NNN (octal).
Without prefix will be treated as decimal.
"""
if len(literal) > 2:
prefix_start = 1 if literal[0] == '-' else 0 # skip negative sign if present

if literal[prefix_start] == "0":
prefix = literal[prefix_start + 1]
if prefix == "x":
return int(literal, 16)
elif prefix == "b":
return int(literal, 2)
return int(literal, 8) # legacy octal (e.g. 077)

return int(literal) # implicit base10


def arg_qualify(arg):
"""
look at arg and qualify its type:
Expand All @@ -327,7 +352,7 @@ def arg_qualify(arg):
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
return ARG(COND, arg_lower, arg)
try:
return ARG(IMM, int(arg), arg)
return ARG(IMM, parse_int(arg), arg)
except ValueError:
pass
try:
Expand Down
39 changes: 36 additions & 3 deletions tests/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
from esp32_ulp.opcodes import make_ins, make_ins_struct_def
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
import esp32_ulp.opcodes as opcodes

Expand Down Expand Up @@ -39,13 +39,37 @@ def test_make_ins():
assert _delay.all == 0x40000023


def test_parse_int():
# decimal
assert parse_int("5") == 5, "5 == 5"
assert parse_int("-5") == -5, "-5 == -5"
# hex
assert parse_int("0x5") == 5, "0x5 == 5"
assert parse_int("0x5a") == 90, "0x5a == 90"
assert parse_int("-0x5a") == -90, "-0x5a == -90"
# binary
assert parse_int("0b1001") == 9, "0b1001 == 9"
assert parse_int("-0b1001") == -9, "-0b1001 == 9"
# octal
assert parse_int("0100") == 64, "0100 == 64"
assert parse_int("0o210") == 136, "0o210 == 136"
assert parse_int("-0100") == -64, "-0100 == -64"
assert parse_int("-0o210") == -136, "-0o210 == -136"
# negative cases
assert_raises(ValueError, parse_int, '0b123', message="invalid syntax for integer with base 2: '123'")
assert_raises(ValueError, parse_int, '0900', message="invalid syntax for integer with base 8: '0900'")
assert_raises(ValueError, parse_int, '0o900', message="invalid syntax for integer with base 8: '900'")
assert_raises(ValueError, parse_int, '0xg', message="invalid syntax for integer with base 16: 'g'")


def test_arg_qualify():
assert arg_qualify('r0') == ARG(REG, 0, 'r0')
assert arg_qualify('R3') == ARG(REG, 3, 'R3')
assert arg_qualify('0') == ARG(IMM, 0, '0')
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
assert arg_qualify('1') == ARG(IMM, 1, '1')
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
Expand Down Expand Up @@ -96,6 +120,11 @@ def test_eval_arg():
assert eval_arg('const >> 1') == 21
assert eval_arg('(const|4)&0xf') == 0xe

assert eval_arg('0x7') == 7
assert eval_arg('010') == 8
assert eval_arg('-0x7') == -7 # negative
assert eval_arg('~0x7') == -8 # complement

assert_raises(ValueError, eval_arg, 'evil()')
assert_raises(ValueError, eval_arg, 'def cafe()')
assert_raises(ValueError, eval_arg, '1 ^ 2')
Expand All @@ -105,14 +134,17 @@ def test_eval_arg():
opcodes.symbols = None


def assert_raises(exception, func, *args):
def assert_raises(exception, func, *args, message=None):
try:
func(*args)
except exception:
except exception as e:
raised = True
actual_message = e.args[0]
else:
raised = False
assert raised
if message:
assert actual_message == message, '%s == %s' % (actual_message, message)


def test_reg_direct_ulp_addressing():
Expand Down Expand Up @@ -208,6 +240,7 @@ def test_reg_address_translations_sens():

test_make_ins_struct_def()
test_make_ins()
test_parse_int()
test_arg_qualify()
test_get_reg()
test_get_imm()
Expand Down
39 changes: 36 additions & 3 deletions tests/opcodes_s2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
from esp32_ulp.opcodes_s2 import make_ins, make_ins_struct_def
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
import esp32_ulp.opcodes_s2 as opcodes

Expand Down Expand Up @@ -39,13 +39,37 @@ def test_make_ins():
assert _delay.all == 0x40000023


def test_parse_int():
# decimal
assert parse_int("5") == 5, "5 == 5"
assert parse_int("-5") == -5, "-5 == -5"
# hex
assert parse_int("0x5") == 5, "0x5 == 5"
assert parse_int("0x5a") == 90, "0x5a == 90"
assert parse_int("-0x5a") == -90, "-0x5a == -90"
# binary
assert parse_int("0b1001") == 9, "0b1001 == 9"
assert parse_int("-0b1001") == -9, "-0b1001 == 9"
# octal
assert parse_int("0100") == 64, "0100 == 64"
assert parse_int("0o210") == 136, "0o210 == 136"
assert parse_int("-0100") == -64, "-0100 == -64"
assert parse_int("-0o210") == -136, "-0o210 == -136"
# negative cases
assert_raises(ValueError, parse_int, '0b123', message="invalid syntax for integer with base 2: '123'")
assert_raises(ValueError, parse_int, '0900', message="invalid syntax for integer with base 8: '0900'")
assert_raises(ValueError, parse_int, '0o900', message="invalid syntax for integer with base 8: '900'")
assert_raises(ValueError, parse_int, '0xg', message="invalid syntax for integer with base 16: 'g'")


def test_arg_qualify():
assert arg_qualify('r0') == ARG(REG, 0, 'r0')
assert arg_qualify('R3') == ARG(REG, 3, 'R3')
assert arg_qualify('0') == ARG(IMM, 0, '0')
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
assert arg_qualify('1') == ARG(IMM, 1, '1')
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
Expand Down Expand Up @@ -96,6 +120,11 @@ def test_eval_arg():
assert eval_arg('const >> 1') == 21
assert eval_arg('(const|4)&0xf') == 0xe

assert eval_arg('0x7') == 7
assert eval_arg('010') == 8
assert eval_arg('-0x7') == -7 # negative
assert eval_arg('~0x7') == -8 # complement

assert_raises(ValueError, eval_arg, 'evil()')
assert_raises(ValueError, eval_arg, 'def cafe()')
assert_raises(ValueError, eval_arg, '1 ^ 2')
Expand All @@ -105,14 +134,17 @@ def test_eval_arg():
opcodes.symbols = None


def assert_raises(exception, func, *args):
def assert_raises(exception, func, *args, message=None):
try:
func(*args)
except exception:
except exception as e:
raised = True
actual_message = e.args[0]
else:
raised = False
assert raised
if message:
assert actual_message == message, '%s == %s' % (actual_message, message)


def test_reg_direct_ulp_addressing():
Expand Down Expand Up @@ -258,6 +290,7 @@ def test_reg_address_translations_s3_sens():

test_make_ins_struct_def()
test_make_ins()
test_parse_int()
test_arg_qualify()
test_get_reg()
test_get_imm()
Expand Down