Fix utf-8 3-byte handling.
This commit is contained in:
parent
ed55660c80
commit
dc3044026c
1 changed files with 9 additions and 9 deletions
|
@ -120,13 +120,13 @@ of the string and a list of remaining bytes (as integers).")
|
|||
(define/doc (utf8-bytes->lists chars)
|
||||
("The same as above but accepts a list of bytes (as integers).")
|
||||
(let loop ((bytes chars)
|
||||
(rpending '())
|
||||
(rpending chars)
|
||||
(pending 0)
|
||||
(expected #f)
|
||||
(res '()))
|
||||
(if (null? bytes)
|
||||
(values (reverse res)
|
||||
(reverse rpending))
|
||||
rpending)
|
||||
(let ((byte (car bytes)))
|
||||
(cond (expected
|
||||
;; Decode UTF-8 sequence
|
||||
|
@ -135,14 +135,14 @@ of the string and a list of remaining bytes (as integers).")
|
|||
(let ((char (integer->char (bitwise-ior pending
|
||||
(bitwise-and byte #b111111)))))
|
||||
(loop (cdr bytes)
|
||||
'()
|
||||
(cdr bytes)
|
||||
0
|
||||
#f
|
||||
(cons char res))))
|
||||
(else
|
||||
;; Intermediate bytes
|
||||
(loop (cdr bytes)
|
||||
(cons byte rpending)
|
||||
rpending
|
||||
(arithmetic-shift (bitwise-ior pending
|
||||
(bitwise-and byte #b111111)) 6)
|
||||
(sub1 expected)
|
||||
|
@ -152,7 +152,7 @@ of the string and a list of remaining bytes (as integers).")
|
|||
(cond ((= (bitwise-and byte #b10000000) 0)
|
||||
;; ASCII
|
||||
(loop (cdr bytes)
|
||||
'()
|
||||
(cdr bytes)
|
||||
0
|
||||
#f
|
||||
(cons (integer->char byte) res)))
|
||||
|
@ -160,20 +160,20 @@ of the string and a list of remaining bytes (as integers).")
|
|||
;; First byte of UTF-8 sequence
|
||||
(let-values
|
||||
(((first-byte char-bytes)
|
||||
(cond ((= (bitwise-and byte #b11000000) #b11000000)
|
||||
(cond ((= (bitwise-and byte #b11100000) #b11000000)
|
||||
(values (bitwise-and byte #b11111)
|
||||
2))
|
||||
((= (bitwise-and byte #b11100000) #b11100000)
|
||||
((= (bitwise-and byte #b11110000) #b11100000)
|
||||
(values (bitwise-and byte #b1111)
|
||||
3))
|
||||
((= (bitwise-and byte #b11110000) #b11110000)
|
||||
((= (bitwise-and byte #b11111000) #b11110000)
|
||||
(values (bitwise-and byte #b111)
|
||||
4))
|
||||
(else
|
||||
;; Should not happen
|
||||
(values 0 0)))))
|
||||
(loop (cdr bytes)
|
||||
(list byte)
|
||||
bytes
|
||||
(arithmetic-shift first-byte 6)
|
||||
(sub1 char-bytes)
|
||||
res))))))))))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue