From 9aa12200e46ec67e7679869004a20fe8e871ab83 Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Mon, 23 Dec 2024 06:44:52 +0000 Subject: [PATCH] Pre-compute unicode category list for xclasses --- src/pcre2_compile_class.c | 118 +++++++++++++++++++++++++++++++-- src/pcre2_internal.h | 19 +++++- src/pcre2_intmodedep.h | 3 + src/pcre2_jit_char_inc.h | 30 ++++----- src/pcre2_jit_compile.c | 10 --- src/pcre2_printint_inc.h | 134 +++++++++++++++++++++++++++++++++++++- src/pcre2_tables.c | 17 +++-- src/pcre2_xclass.c | 54 ++++++--------- 8 files changed, 305 insertions(+), 80 deletions(-) diff --git a/src/pcre2_compile_class.c b/src/pcre2_compile_class.c index 47d151387..a28563a57 100644 --- a/src/pcre2_compile_class.c +++ b/src/pcre2_compile_class.c @@ -115,10 +115,11 @@ while (TRUE) #ifdef SUPPORT_UNICODE -#define PARSE_CLASS_UTF 0x1 -#define PARSE_CLASS_CASELESS_UTF 0x2 -#define PARSE_CLASS_RESTRICTED_UTF 0x4 -#define PARSE_CLASS_TURKISH_UTF 0x8 +#define PARSE_CLASS_UTF 0x01 +#define PARSE_CLASS_CASELESS_UTF 0x02 +#define PARSE_CLASS_RESTRICTED_UTF 0x04 +#define PARSE_CLASS_TURKISH_UTF 0x08 +#define PARSE_CLASS_COMPUTE_CATLIST 0x10 /* Get the range of nocase characters which includes the 'c' character passed as argument, or directly follows 'c'. */ @@ -357,6 +358,7 @@ append_non_ascii_range(uint32_t options, uint32_t *buffer) return buffer + 2; } +/* The buffer may represent the categry list pointer when utf is enabled. */ static size_t parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer) { @@ -364,6 +366,20 @@ size_t total_size = 0; size_t size; uint32_t meta_arg; uint32_t start_char; +uint32_t ptype; +#ifdef SUPPORT_UNICODE +uint32_t pdata; +uint32_t category_list; +uint32_t *pcategory_list = NULL; +#endif + +#ifdef SUPPORT_UNICODE +if ((options & PARSE_CLASS_COMPUTE_CATLIST) != 0) + { + pcategory_list = buffer; + buffer = NULL; + } +#endif while (TRUE) { @@ -407,7 +423,8 @@ while (TRUE) case ESC_p: case ESC_P: ptr++; - if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY) + ptype = (*ptr >> 16); + if (meta_arg == ESC_p && ptype == PT_ANY) { if (buffer != NULL) { @@ -417,6 +434,43 @@ while (TRUE) } total_size += 2; } +#ifdef SUPPORT_UNICODE + if (pcategory_list == NULL) break; + + category_list = 0; + + switch(ptype) + { + case PT_LAMP: + category_list = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); + break; + + case PT_GC: + pdata = *ptr & 0xffff; + category_list = UCPCAT_RANGE(PRIV(ucp_typerange)[pdata], + PRIV(ucp_typerange)[pdata + 1] - 1); + break; + + case PT_PC: + pdata = *ptr & 0xffff; + category_list = UCPCAT(pdata); + break; + + case PT_WORD: + category_list = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N; + break; + + case PT_ALNUM: + category_list = UCPCAT_L | UCPCAT_N; + break; + } + + if (category_list > 0) + { + if (meta_arg == ESC_P) category_list ^= UCPCAT_ALL; + *pcategory_list |= category_list; + } +#endif break; } ptr++; @@ -511,6 +565,9 @@ const uint32_t *char_list_next; uint16_t *next_char; uint32_t char_list_start, char_list_end; uint32_t range_start, range_end; +#ifdef SUPPORT_UNICODE +uint32_t category_list = 0; +#endif #ifdef SUPPORT_UNICODE if (options & PCRE2_UTF) @@ -531,11 +588,22 @@ if (xoptions & PCRE2_EXTRA_TURKISH_CASING) /* Compute required space for the range. */ +#ifdef SUPPORT_UNICODE +range_list_size = parse_class(start_ptr, + class_options | PARSE_CLASS_COMPUTE_CATLIST, + &category_list); +#else range_list_size = parse_class(start_ptr, class_options, NULL); +#endif PCRE2_ASSERT((range_list_size & 0x1) == 0); /* Allocate buffer. The total_size also represents the end of the buffer. */ +#ifdef SUPPORT_UNICODE +/* Replaced by an OP_ALLANY. */ +if (category_list == UCPCAT_ALL) range_list_size = 2; +#endif + total_size = range_list_size + ((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0); @@ -553,6 +621,21 @@ cranges->range_list_size = (uint16_t)range_list_size; cranges->char_lists_types = 0; cranges->char_lists_size = 0; cranges->char_lists_start = 0; +#ifdef SUPPORT_UNICODE +cranges->category_list = category_list; +#endif + +#ifdef SUPPORT_UNICODE +if (category_list == UCPCAT_ALL) + { + /* Replace the xclass with OP_ALLANY. */ + cranges->category_list = 0; + buffer = (uint32_t*)(cranges + 1); + buffer[0] = 0; + buffer[1] = get_highest_char(class_options); + return cranges; + } +#endif if (range_list_size == 0) return cranges; @@ -1087,6 +1170,7 @@ BOOL utf = FALSE; #ifdef SUPPORT_WIDE_CHARS uint32_t xclass_props; +uint32_t category_list; PCRE2_UCHAR *class_uchardata; class_ranges* cranges; #else @@ -1107,6 +1191,7 @@ should_flip_negation = FALSE; #ifdef SUPPORT_WIDE_CHARS xclass_props = 0; +category_list = 0; #if PCRE2_CODE_UNIT_WIDTH == 8 cranges = NULL; @@ -1140,6 +1225,9 @@ if (utf) cb->first_data = cranges->header.next; } + category_list = cranges->category_list; + PCRE2_ASSERT(category_list != UCPCAT_ALL); + if (cranges->range_list_size > 0) { const uint32_t *ranges = (const uint32_t*)(cranges + 1); @@ -1154,6 +1242,13 @@ if (utf) } class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ + +if (cranges != NULL && category_list != 0 && + (xclass_props & XCLASS_HIGH_ANY) == 0) + { + xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS; + class_uchardata += sizeof(uint32_t) / sizeof(PCRE2_UCHAR); + } #endif /* SUPPORT_WIDE_CHARS */ /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map @@ -1444,7 +1539,9 @@ while (TRUE) PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits); - if ((xclass_props & XCLASS_HIGH_ANY) == 0) + if ((xclass_props & XCLASS_HIGH_ANY) == 0 && + ptype != PT_LAMP && ptype != PT_GC && ptype != PT_PC && + ptype != PT_WORD && ptype != PT_ALNUM) { if (lengthptr != NULL) *lengthptr += 3; @@ -1709,6 +1806,15 @@ if ((xclass_props & XCLASS_REQUIRED) != 0) *code = negate_class? XCL_NOT:0; if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP; + /* The category_list is placed after the class feature bitset. + The code pointer is not increased, because the bitset for the + first 256 characters may be injected after the feature bitset. */ + if (category_list != 0) + { + *code |= XCL_HASCATLIST; + memmove(code + 1, &category_list, sizeof(uint32_t)); + } + /* If the map is required, move up the extra data to make room for it; otherwise just move the code pointer to the end of the extra data. */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index d26edb4e7..88895ea01 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1515,9 +1515,10 @@ table. */ /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ -#define XCL_NOT 0x01 /* Flag: this is a negative class */ -#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ -#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ +#define XCL_NOT 0x01 /* Flag: this is a negative class */ +#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ +#define XCL_HASPROP 0x04 /* Flag: property checks are present */ +#define XCL_HASCATLIST 0x08 /* Flag: category list is present */ #define XCL_END 0 /* Marks end of individual items */ #define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ @@ -2189,6 +2190,18 @@ typedef struct { ((uint32_t)(ch) == 0x0130u ? 0x69u : \ (uint32_t)(ch) == 0x49u ? 0x0131u : (uint32_t)(ch)) +/* UCP bitset manipulating macros. */ + +#ifdef SUPPORT_UNICODE +#define UCPCAT(bit) (1 << (bit)) +#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2)) +#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3)) +#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1)) +#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu) +#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No) +#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1) +#endif + /* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words that form a bitmap representing a list of scripts or boolean properties. These macros test or set a bit in the map by number. */ diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 02763c4f0..e45372a1f 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -754,6 +754,9 @@ typedef struct class_ranges { compile_data header; /* Common header */ size_t char_lists_size; /* Total size of encoded char lists */ size_t char_lists_start; /* Start offset of encoded char lists */ +#ifdef SUPPORT_UNICODE + uint32_t category_list; /* Bitset of matching unicode categories. */ +#endif uint16_t range_list_size; /* Size of ranges array */ uint16_t char_lists_types; /* The XCL_LIST header of char lists */ /* Followed by the list of ranges (start/end pairs) */ diff --git a/src/pcre2_jit_char_inc.h b/src/pcre2_jit_char_inc.h index 475895405..944faabf4 100644 --- a/src/pcre2_jit_char_inc.h +++ b/src/pcre2_jit_char_inc.h @@ -533,6 +533,13 @@ if (flags & XCL_MAP) cc += 32 / sizeof(PCRE2_UCHAR); #ifdef SUPPORT_UNICODE +if (flags & XCL_HASCATLIST) + { + memcpy(&category_list, cc, sizeof(uint32_t)); + status |= XCLASS_HAS_TYPE; + cc += sizeof(uint32_t) / sizeof(PCRE2_UCHAR); + } + while (*cc == XCL_PROP || *cc == XCL_NOTPROP) { compares++; @@ -542,12 +549,14 @@ while (*cc == XCL_PROP || *cc == XCL_NOTPROP) switch(*cc) { + /* JIT compiles bare (not in class) escape sequences using + this code path, so setting categories must be kept. */ case PT_LAMP: items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); break; case PT_GC: - items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]); + items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1]], PRIV(ucp_typerange)[(int)cc[1] + 1] - 1); break; case PT_PC: @@ -614,21 +623,7 @@ while (*cc == XCL_PROP || *cc == XCL_NOTPROP) cc += 2; } -if (category_list == UCPCAT_ALL) - { - /* All or no characters are accepted, same as dotall. */ - if (status & XCLASS_IS_ECLASS) - { - if (list != backtracks) - OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); - return; - } - - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - if (list == backtracks) - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - return; - } +SLJIT_ASSERT(category_list != UCPCAT_ALL); if (category_list != 0) compares++; @@ -681,6 +676,9 @@ if ((flags & XCL_MAP) != 0) } #ifdef SUPPORT_UNICODE +if (flags & XCL_HASCATLIST) + cc += sizeof(uint32_t) / sizeof(PCRE2_UCHAR); + if (status & XCLASS_NEEDS_UCD) { if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 3bfd8e06a..a4ed9c172 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -7147,16 +7147,6 @@ else JUMPTO(SLJIT_JUMP, mainloop); } -#ifdef SUPPORT_UNICODE -#define UCPCAT(bit) (1 << (bit)) -#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2)) -#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3)) -#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1)) -#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu) -#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No) -#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1) -#endif - static void check_wordboundary(compiler_common *common, BOOL ucp) { DEFINE_COMPILER; diff --git a/src/pcre2_printint_inc.h b/src/pcre2_printint_inc.h index 49319a2a5..ca15aa4b3 100644 --- a/src/pcre2_printint_inc.h +++ b/src/pcre2_printint_inc.h @@ -69,6 +69,7 @@ STATIC_ASSERT(sizeof(OP_names)/sizeof(*OP_names) == OP_TABLE_LENGTH, OP_names); #define print_prop PCRE2_SUFFIX(print_prop_) #define print_char_list PCRE2_SUFFIX(print_char_list_) #define print_map PCRE2_SUFFIX(print_map_) +#define print_catlist PCRE2_SUFFIX(print_catlist_) #define print_class PCRE2_SUFFIX(print_class_) /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that @@ -514,6 +515,129 @@ for (input = 0; input < 256; input++) +/************************************************* +* Print a character bitmap * +*************************************************/ + +/* Prints a 32-byte bitmap, which occurs within a character class opcode. + +Arguments: + f file to write to + list unicode category list + +Returns: nothing +*/ + +static void +print_catlist(FILE *f, const uint8_t *list) +{ +uint32_t category_list; +uint32_t bitcount, mask, pc_start; +int i, j; +const char *name; + +memcpy(&category_list, list, sizeof(uint32_t)); + +mask = ~category_list & UCPCAT_ALL; + +/* Groups with special name. */ +if (category_list == UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt)) + { + fprintf(f, "\\p{Lc}"); + return; + } + +if (mask == UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt)) + { + fprintf(f, "\\P{Lc}"); + return; + } + +if (category_list == (UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N)) + { + fprintf(f, "\\p{Xwd}"); + return; + } + +if (mask == (UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N)) + { + fprintf(f, "\\P{Xwd}"); + return; + } + +if (category_list == (UCPCAT_L | UCPCAT_N)) + { + fprintf(f, "\\p{Xan}"); + return; + } + +if (mask == (UCPCAT_L | UCPCAT_N)) + { + fprintf(f, "\\P{Xan}"); + return; + } + +/* Scan negated categories first. */ +if ((mask & (mask - 1)) == 0) + { + /* Only one negated category is set. */ + j = 0; + while ((mask & 0x1) == 0) + { + j++; + mask >>= 1; + } + + name = get_ucpname(PT_PC, j); + fprintf(f, "\\P{%c%s}", toupper(name[0]), name + 1); + return; + } + +for (i = 0; i < 7; i++) + { + mask = ((1 << (PRIV(ucp_typerange)[i + 1])) - 1) - + ((1 << PRIV(ucp_typerange)[i]) - 1); + + if ((category_list | mask) == UCPCAT_ALL) + { + /* Negated general category. */ + name = get_ucpname(PT_GC, i); + fprintf(f, "\\P{%c}", toupper(name[0])); + category_list &= mask; + break; + } + } + +pc_start = 0; +for (i = 0; i < 7; i++) + { + /* Scan general categories. */ + bitcount = PRIV(ucp_typerange)[i + 1] - PRIV(ucp_typerange)[i]; + mask = (1 << bitcount) - 1; + + if ((category_list & mask) == mask) + { + name = get_ucpname(PT_GC, i); + fprintf(f, "\\p{%c}", toupper(name[0])); + } + else if ((category_list & mask) != 0) + { + /* Scan particular categories. */ + for (j = 0; j < bitcount; j++) + if ((category_list & (1 << j)) != 0) + { + name = get_ucpname(PT_PC, pc_start + j); + fprintf(f, "\\p{%c%s}", toupper(name[0]), name + 1); + } + } + + category_list >>= bitcount; + pc_start += bitcount; + } +} + + + /************************************************* * Print character class * *************************************************/ @@ -536,7 +660,7 @@ static void print_class(FILE *f, int type, PCRE2_SPTR code, const uint8_t *char_lists_end, BOOL utf, const char *before, const char *after) { -BOOL printmap, negated; +BOOL printmap, printcatlist, negated; PCRE2_SPTR ccode; /* Negative XCLASS and NCLASS both have a bitmap indicating which characters @@ -545,12 +669,14 @@ if (type == OP_XCLASS) { ccode = code + LINK_SIZE; printmap = (*ccode & XCL_MAP) != 0; + printcatlist = (*ccode & XCL_HASCATLIST) != 0; negated = (*ccode & XCL_NOT) != 0; ccode++; } else /* CLASS or NCLASS */ { printmap = TRUE; + printcatlist = FALSE; negated = type == OP_NCLASS; ccode = code; } @@ -564,6 +690,12 @@ if (printmap) ccode += 32 / sizeof(PCRE2_UCHAR); } +if (printcatlist) + { + print_catlist(f, (const uint8_t *)ccode); + ccode += sizeof(uint32_t) / sizeof(PCRE2_UCHAR); + } + /* For an XCLASS there is always some additional data */ if (type == OP_XCLASS) { diff --git a/src/pcre2_tables.c b/src/pcre2_tables.c index 6f8b8b553..86d6e7bbf 100644 --- a/src/pcre2_tables.c +++ b/src/pcre2_tables.c @@ -211,20 +211,19 @@ const uint32_t PRIV(ucp_gbtable)[] = { #undef ESZ -#ifdef SUPPORT_JIT /* This table reverses PRIV(ucp_gentype). We can save the cost of a memory load. */ const int PRIV(ucp_typerange)[] = { - ucp_Cc, ucp_Cs, - ucp_Ll, ucp_Lu, - ucp_Mc, ucp_Mn, - ucp_Nd, ucp_No, - ucp_Pc, ucp_Ps, - ucp_Sc, ucp_So, - ucp_Zl, ucp_Zs, + ucp_Cc, + ucp_Ll, + ucp_Mc, + ucp_Nd, + ucp_Pc, + ucp_Sc, + ucp_Zl, + ucp_Zs + 1, /* Terminator, not part of the list. */ }; -#endif /* SUPPORT_JIT */ /* Finally, include the tables that are auto-generated from the Unicode data files. */ diff --git a/src/pcre2_xclass.c b/src/pcre2_xclass.c index 3273edd46..6783f1549 100644 --- a/src/pcre2_xclass.c +++ b/src/pcre2_xclass.c @@ -68,7 +68,8 @@ PRIV(xclass)(uint32_t c, PCRE2_SPTR data, const uint8_t *char_lists_end, BOOL ut { /* Update PRIV(update_classbits) when this function is changed. */ PCRE2_UCHAR t; -BOOL not_negated = (*data & XCL_NOT) == 0; +PCRE2_UCHAR flags = *data++; +BOOL not_negated = (flags & XCL_NOT) == 0; uint32_t type, max_index, min_index, value; const uint8_t *next_char; @@ -79,7 +80,7 @@ utf = TRUE; /* Code points < 256 are matched against a bitmap, if one is present. */ -if ((*data++ & XCL_MAP) != 0) +if ((flags & XCL_MAP) != 0) { if (c < 256) return (((const uint8_t *)data)[c/8] & (1u << (c&7))) != 0; @@ -90,12 +91,26 @@ if ((*data++ & XCL_MAP) != 0) /* Match against the list of Unicode properties. We won't ever encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */ #ifdef SUPPORT_UNICODE -if (*data == XCL_PROP || *data == XCL_NOTPROP) +if ((flags & XCL_HASPROP) != 0) { /* The UCD record is the same for all properties. */ const ucd_record *prop = GET_UCD(c); - do + PCRE2_ASSERT(*data == XCL_PROP || *data == XCL_NOTPROP || + (flags & XCL_HASCATLIST) != 0); + + if ((flags & XCL_HASCATLIST) != 0) + { + uint32_t category_list; + memcpy(&category_list, data, sizeof(uint32_t)); + + if (category_list & (1 << prop->chartype)) return not_negated; + + /* Skip bitmap. */ + data += sizeof(uint32_t) / sizeof(PCRE2_UCHAR); + } + + while (*data == XCL_PROP || *data == XCL_NOTPROP) { int chartype; BOOL isprop = (*data++) == XCL_PROP; @@ -103,21 +118,6 @@ if (*data == XCL_PROP || *data == XCL_NOTPROP) switch(*data) { - case PT_LAMP: - chartype = prop->chartype; - if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == isprop) return not_negated; - break; - - case PT_GC: - if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) - return not_negated; - break; - - case PT_PC: - if ((data[1] == prop->chartype) == isprop) return not_negated; - break; - case PT_SC: if ((data[1] == prop->script) == isprop) return not_negated; break; @@ -128,13 +128,6 @@ if (*data == XCL_PROP || *data == XCL_NOTPROP) if (ok == isprop) return not_negated; break; - case PT_ALNUM: - chartype = prop->chartype; - if ((PRIV(ucp_gentype)[chartype] == ucp_L || - PRIV(ucp_gentype)[chartype] == ucp_N) == isprop) - return not_negated; - break; - /* Perl space used to exclude VT, but from Perl 5.18 it is included, which means that Perl space and POSIX space are now identical. PCRE was changed at release 8.34. */ @@ -155,14 +148,6 @@ if (*data == XCL_PROP || *data == XCL_NOTPROP) } break; - case PT_WORD: - chartype = prop->chartype; - if ((PRIV(ucp_gentype)[chartype] == ucp_L || - PRIV(ucp_gentype)[chartype] == ucp_N || - chartype == ucp_Mn || chartype == ucp_Pc) == isprop) - return not_negated; - break; - case PT_UCNC: if (c < 0xa0) { @@ -257,7 +242,6 @@ if (*data == XCL_PROP || *data == XCL_NOTPROP) data += 2; } - while (*data == XCL_PROP || *data == XCL_NOTPROP); } #else (void)utf; /* Avoid compiler warning */