Fix utf-8 3-byte handling.
This commit is contained in:
parent
ed55660c80
commit
dc3044026c
1 changed files with 9 additions and 9 deletions
|
@ -120,13 +120,13 @@ of the string and a list of remaining bytes (as integers).")
|
||||||
(define/doc (utf8-bytes->lists chars)
|
(define/doc (utf8-bytes->lists chars)
|
||||||
("The same as above but accepts a list of bytes (as integers).")
|
("The same as above but accepts a list of bytes (as integers).")
|
||||||
(let loop ((bytes chars)
|
(let loop ((bytes chars)
|
||||||
(rpending '())
|
(rpending chars)
|
||||||
(pending 0)
|
(pending 0)
|
||||||
(expected #f)
|
(expected #f)
|
||||||
(res '()))
|
(res '()))
|
||||||
(if (null? bytes)
|
(if (null? bytes)
|
||||||
(values (reverse res)
|
(values (reverse res)
|
||||||
(reverse rpending))
|
rpending)
|
||||||
(let ((byte (car bytes)))
|
(let ((byte (car bytes)))
|
||||||
(cond (expected
|
(cond (expected
|
||||||
;; Decode UTF-8 sequence
|
;; Decode UTF-8 sequence
|
||||||
|
@ -135,14 +135,14 @@ of the string and a list of remaining bytes (as integers).")
|
||||||
(let ((char (integer->char (bitwise-ior pending
|
(let ((char (integer->char (bitwise-ior pending
|
||||||
(bitwise-and byte #b111111)))))
|
(bitwise-and byte #b111111)))))
|
||||||
(loop (cdr bytes)
|
(loop (cdr bytes)
|
||||||
'()
|
(cdr bytes)
|
||||||
0
|
0
|
||||||
#f
|
#f
|
||||||
(cons char res))))
|
(cons char res))))
|
||||||
(else
|
(else
|
||||||
;; Intermediate bytes
|
;; Intermediate bytes
|
||||||
(loop (cdr bytes)
|
(loop (cdr bytes)
|
||||||
(cons byte rpending)
|
rpending
|
||||||
(arithmetic-shift (bitwise-ior pending
|
(arithmetic-shift (bitwise-ior pending
|
||||||
(bitwise-and byte #b111111)) 6)
|
(bitwise-and byte #b111111)) 6)
|
||||||
(sub1 expected)
|
(sub1 expected)
|
||||||
|
@ -152,7 +152,7 @@ of the string and a list of remaining bytes (as integers).")
|
||||||
(cond ((= (bitwise-and byte #b10000000) 0)
|
(cond ((= (bitwise-and byte #b10000000) 0)
|
||||||
;; ASCII
|
;; ASCII
|
||||||
(loop (cdr bytes)
|
(loop (cdr bytes)
|
||||||
'()
|
(cdr bytes)
|
||||||
0
|
0
|
||||||
#f
|
#f
|
||||||
(cons (integer->char byte) res)))
|
(cons (integer->char byte) res)))
|
||||||
|
@ -160,20 +160,20 @@ of the string and a list of remaining bytes (as integers).")
|
||||||
;; First byte of UTF-8 sequence
|
;; First byte of UTF-8 sequence
|
||||||
(let-values
|
(let-values
|
||||||
(((first-byte char-bytes)
|
(((first-byte char-bytes)
|
||||||
(cond ((= (bitwise-and byte #b11000000) #b11000000)
|
(cond ((= (bitwise-and byte #b11100000) #b11000000)
|
||||||
(values (bitwise-and byte #b11111)
|
(values (bitwise-and byte #b11111)
|
||||||
2))
|
2))
|
||||||
((= (bitwise-and byte #b11100000) #b11100000)
|
((= (bitwise-and byte #b11110000) #b11100000)
|
||||||
(values (bitwise-and byte #b1111)
|
(values (bitwise-and byte #b1111)
|
||||||
3))
|
3))
|
||||||
((= (bitwise-and byte #b11110000) #b11110000)
|
((= (bitwise-and byte #b11111000) #b11110000)
|
||||||
(values (bitwise-and byte #b111)
|
(values (bitwise-and byte #b111)
|
||||||
4))
|
4))
|
||||||
(else
|
(else
|
||||||
;; Should not happen
|
;; Should not happen
|
||||||
(values 0 0)))))
|
(values 0 0)))))
|
||||||
(loop (cdr bytes)
|
(loop (cdr bytes)
|
||||||
(list byte)
|
bytes
|
||||||
(arithmetic-shift first-byte 6)
|
(arithmetic-shift first-byte 6)
|
||||||
(sub1 char-bytes)
|
(sub1 char-bytes)
|
||||||
res))))))))))
|
res))))))))))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue