66size_t u8next_(
const char *start,
const char* end,
char32_t &val)
69 const unsigned char *s = (
const unsigned char *)start;
70 const unsigned char *e = (
const unsigned char *)end;
77 if (*s < 0x80) { len = 1;
fsmGOTO(end); }
79 if (*s <= 0xC1) {
fsmGOTO(invalid); }
80 if (*s <= 0xDF) { val &= 0x1F; len = 2;
fsmGOTO(len2_0); }
81 if (*s == 0xE0) { val &= 0x0F; len = 3;
fsmGOTO(len3_0); }
82 if (*s <= 0xEC) { val &= 0x0F; len = 3;
fsmGOTO(len3_1); }
83 if (*s == 0xED) { val &= 0x0F; len = 3;
fsmGOTO(len3_2); }
84 if (*s <= 0xEF) { val &= 0x0F; len = 3;
fsmGOTO(len3_1); }
85 if (*s == 0xF0) { val &= 0x07; len = 4;
fsmGOTO(len4_0); }
86 if (*s <= 0xF3) { val &= 0x07; len = 4;
fsmGOTO(len4_1); }
87 if (*s == 0xF4) { val &= 0x07; len = 4;
fsmGOTO(len4_2); }
98 s++;
if (s == e || *s < 0x90 || 0xBF < *s)
fsmGOTO(invalid);
103 s++;
if (s == e || *s < 0x80 || 0xBF < *s)
fsmGOTO(invalid);
108 s++;
if (s == e || *s < 0x80 || 0x8F < *s)
fsmGOTO(invalid);
113 val = (val << 6) | (*s & 0x3F);
118 s++;
if (s == e || *s < 0xA0 || 0xBF < *s)
fsmGOTO(invalid);
123 s++;
if (s == e || *s < 0x80 || 0xBF < *s)
fsmGOTO(invalid);
128 s++;
if (s == e || *s < 0x80 || 0x9F < *s)
fsmGOTO(invalid);
133 val = (val << 6) | (*s & 0x3F);
138 s++;
if (s == e || *s < 0x80 || 0xBF < *s)
fsmGOTO(invalid);
139 val = (val << 6) | (*s & 0x3F);
145 size_t adv = s - (
const unsigned char*)start;
146 len = (adv <= 1 ? 1 : adv);
159 unsigned char *s = (
unsigned char *)txt;
160 unsigned char first = *s;
167 if ((first & 0xF8) == 0xF0) { val |= (first & 0x07)<<6;
goto L4;}
168 else if ((first & 0xF0) == 0xE0) { val |= (first & 0x0F)<<6;
goto L3;}
169 else { val |= (first & 0x1F)<<6;
goto L2;}
171 L4: val <<= 6;
if (*s) s++; val |= (*s & 0x3F);
172 L3: val <<= 6;
if (*s) s++; val |= (*s & 0x3F);
175 len = 1+(s-(
unsigned char *)txt);