diff --git a/src/util-utf8.scm b/src/util-utf8.scm index 14a4c6b..0deef64 100644 --- a/src/util-utf8.scm +++ b/src/util-utf8.scm @@ -120,13 +120,13 @@ of the string and a list of remaining bytes (as integers).") (define/doc (utf8-bytes->lists chars) ("The same as above but accepts a list of bytes (as integers).") (let loop ((bytes chars) - (rpending '()) + (rpending chars) (pending 0) (expected #f) (res '())) (if (null? bytes) (values (reverse res) - (reverse rpending)) + rpending) (let ((byte (car bytes))) (cond (expected ;; Decode UTF-8 sequence @@ -135,14 +135,14 @@ of the string and a list of remaining bytes (as integers).") (let ((char (integer->char (bitwise-ior pending (bitwise-and byte #b111111))))) (loop (cdr bytes) - '() + (cdr bytes) 0 #f (cons char res)))) (else ;; Intermediate bytes (loop (cdr bytes) - (cons byte rpending) + rpending (arithmetic-shift (bitwise-ior pending (bitwise-and byte #b111111)) 6) (sub1 expected) @@ -152,7 +152,7 @@ of the string and a list of remaining bytes (as integers).") (cond ((= (bitwise-and byte #b10000000) 0) ;; ASCII (loop (cdr bytes) - '() + (cdr bytes) 0 #f (cons (integer->char byte) res))) @@ -160,20 +160,20 @@ of the string and a list of remaining bytes (as integers).") ;; First byte of UTF-8 sequence (let-values (((first-byte char-bytes) - (cond ((= (bitwise-and byte #b11000000) #b11000000) + (cond ((= (bitwise-and byte #b11100000) #b11000000) (values (bitwise-and byte #b11111) 2)) - ((= (bitwise-and byte #b11100000) #b11100000) + ((= (bitwise-and byte #b11110000) #b11100000) (values (bitwise-and byte #b1111) 3)) - ((= (bitwise-and byte #b11110000) #b11110000) + ((= (bitwise-and byte #b11111000) #b11110000) (values (bitwise-and byte #b111) 4)) (else ;; Should not happen (values 0 0))))) (loop (cdr bytes) - (list byte) + bytes (arithmetic-shift first-byte 6) (sub1 char-bytes) res))))))))))