Skip to content

Commit 04ec02a

Browse files
FFY00pablogsal
andauthored
pegen: fold f-string constants and fix string concatenation (python#149)
This patch folds the f-string constants like they were previously (1), and makes string concatenation construct a Constant instead of JoinedStr (2). 1) `f'hello' f'hello'` will now construct JoinedStr(values=[Constant(value='hellohello')]) 2) `'hello' 'hello'` will now construct Constant(value='hellohello') Co-authored-by: Pablo Galindo Salgado <[email protected]>
1 parent 25184ed commit 04ec02a

File tree

1 file changed

+79
-7
lines changed

1 file changed

+79
-7
lines changed

Parser/pegen.c

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2350,7 +2350,7 @@ _PyPegen_concatenate_strings2(Parser *p, asdl_expr_seq *strings,
23502350
int bytes_found = 0;
23512351

23522352
Py_ssize_t i = 0;
2353-
Py_ssize_t n_elements = 0;
2353+
Py_ssize_t n_flattened_elements = 0;
23542354
for (i = 0; i < len; i++) {
23552355
expr_ty elem = asdl_seq_GET(strings, i);
23562356
if (elem->kind == Constant_kind) {
@@ -2359,9 +2359,9 @@ _PyPegen_concatenate_strings2(Parser *p, asdl_expr_seq *strings,
23592359
} else {
23602360
unicode_string_found = 1;
23612361
}
2362-
n_elements++;
2362+
n_flattened_elements++;
23632363
} else {
2364-
n_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
2364+
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
23652365
f_string_found = 1;
23662366
}
23672367
}
@@ -2383,28 +2383,100 @@ _PyPegen_concatenate_strings2(Parser *p, asdl_expr_seq *strings,
23832383
return _PyAST_Constant(res, NULL, lineno, col_offset, end_lineno, end_col_offset, p->arena);
23842384
}
23852385

2386-
asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
2387-
if (values == NULL) {
2386+
if (!f_string_found && len == 1) {
2387+
return asdl_seq_GET(strings, 0);
2388+
}
2389+
2390+
asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
2391+
if (flattened == NULL) {
23882392
return NULL;
23892393
}
23902394

2395+
/* build flattened list */
23912396
Py_ssize_t current_pos = 0;
23922397
Py_ssize_t j = 0;
23932398
for (i = 0; i < len; i++) {
23942399
expr_ty elem = asdl_seq_GET(strings, i);
23952400
if (elem->kind == Constant_kind) {
2396-
asdl_seq_SET(values, current_pos++, elem);
2401+
asdl_seq_SET(flattened, current_pos++, elem);
23972402
} else {
23982403
for (j=0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
23992404
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
24002405
if (subvalue == NULL) {
24012406
return NULL;
24022407
}
2403-
asdl_seq_SET(values, current_pos++, subvalue);
2408+
asdl_seq_SET(flattened, current_pos++, subvalue);
2409+
}
2410+
}
2411+
}
2412+
2413+
/* calculate folded element count */
2414+
Py_ssize_t n_elements = 0;
2415+
int prev_is_constant = 0;
2416+
for (i = 0; i < n_flattened_elements; i++) {
2417+
expr_ty elem = asdl_seq_GET(flattened, i);
2418+
if (!prev_is_constant || elem->kind != Constant_kind) {
2419+
n_elements++;
2420+
}
2421+
prev_is_constant = elem->kind == Constant_kind;
2422+
}
2423+
2424+
asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
2425+
if (values == NULL) {
2426+
return NULL;
2427+
}
2428+
2429+
/* build folded list */
2430+
_PyUnicodeWriter writer;
2431+
current_pos = 0;
2432+
for (i = 0; i < n_flattened_elements; i++) {
2433+
expr_ty elem = asdl_seq_GET(flattened, i);
2434+
2435+
/* if the current elem and the following are constants,
2436+
fold them and all consequent constants */
2437+
if (elem->kind == Constant_kind && i+1 < n_flattened_elements
2438+
&& asdl_seq_GET(flattened, i+1)->kind == Constant_kind) {
2439+
2440+
_PyUnicodeWriter_Init(&writer);
2441+
expr_ty last_elem = elem;
2442+
for (j = i; j < n_flattened_elements; j++) {
2443+
elem = asdl_seq_GET(flattened, j);
2444+
if (elem->kind == Constant_kind) {
2445+
if (_PyUnicodeWriter_WriteStr(&writer, elem->v.Constant.value)) {
2446+
_PyUnicodeWriter_Dealloc(&writer);
2447+
return NULL;
2448+
}
2449+
last_elem = elem;
2450+
} else {
2451+
break;
2452+
}
2453+
}
2454+
i = j-1;
2455+
2456+
PyObject *concat_str = _PyUnicodeWriter_Finish(&writer);
2457+
if (concat_str == NULL) {
2458+
_PyUnicodeWriter_Dealloc(&writer);
2459+
return NULL;
2460+
}
2461+
2462+
elem = _PyAST_Constant(concat_str, NULL, elem->lineno, elem->col_offset,
2463+
last_elem->end_lineno, last_elem->end_col_offset, p->arena);
2464+
if (elem == NULL) {
2465+
Py_DECREF(concat_str);
2466+
return NULL;
24042467
}
24052468
}
2469+
2470+
asdl_seq_SET(values, current_pos++, elem);
24062471
}
24072472

2473+
if (!f_string_found) {
2474+
assert(n_elements == 1);
2475+
expr_ty elem = asdl_seq_GET(values, 0);
2476+
assert(elem->kind == Constant_kind);
2477+
return elem;
2478+
}
2479+
24082480
return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
24092481
}
24102482

0 commit comments

Comments
 (0)