From f3344f279b2781a63ef689fc49b0d28f7f8294a1 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sun, 18 Jul 2021 17:34:09 -0400 Subject: [PATCH v18 2/6] Unroll loop in DFA We know exactly how many state transitions will happen for each valid sequence, and we know we can't possibly overshoot the input length, so compute all transitions and check the state at the end. --- src/common/wchar.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/src/common/wchar.c b/src/common/wchar.c index aafc602bcd..0454e332cc 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -1890,16 +1890,45 @@ utf8_advance(const unsigned char *s) uint64 state = BGN; int l = 0; - do + class = ByteCategory[*s++]; + state = class >> (state & DFA_MASK); + + switch(class) { - class = ByteCategory[*s++]; - state = (class >> state) & DFA_MASK; - l++; - } while (state > ERR); + case L2A: + l = 2; + class = ByteCategory[*s++]; + state = class >> (state & DFA_MASK); + break; + case L3A: + case L3B: + case L3C: + l = 3; + class = ByteCategory[*s++]; + state = class >> (state & DFA_MASK); + class = ByteCategory[*s++]; + state = class >> (state & DFA_MASK); + break; + case L4A: + case L4B: + case L4C: + l = 4; + class = ByteCategory[*s++]; + state = class >> (state & DFA_MASK); + class = ByteCategory[*s++]; + state = class >> (state & DFA_MASK); + class = ByteCategory[*s++]; + state = class >> (state & DFA_MASK); + break; + default: + l = 1; + Assert(class == NZA || class == ILL); + } - if (state == ERR) + if ((state & DFA_MASK) == ERR) return -1; + Assert((state & DFA_MASK) == END); Assert(l <= 4); return l; } -- 2.31.1