Finish ducking.
This commit is contained in:
parent
1b18c99d3c
commit
c42582a799
4 changed files with 107 additions and 31 deletions
|
@ -609,3 +609,74 @@ Directory handling which didn't fit elsewhere.
|
||||||
|
|
||||||
Makes sure given path exists and it is a directory. Throws an error
|
Makes sure given path exists and it is a directory. Throws an error
|
||||||
if it exists and it is not a directory.
|
if it exists and it is not a directory.
|
||||||
|
|
||||||
|
## util-utf8 [module]
|
||||||
|
|
||||||
|
(import util-utf8)
|
||||||
|
|
||||||
|
High-performance UTF-8 support.
|
||||||
|
|
||||||
|
### utf8-char->string [procedure]
|
||||||
|
|
||||||
|
(utf8-char->string ch)
|
||||||
|
|
||||||
|
Encodes given character as UTF-8.
|
||||||
|
|
||||||
|
### make-utf8-string [procedure]
|
||||||
|
|
||||||
|
(make-utf8-string n
|
||||||
|
(ch #\space))
|
||||||
|
|
||||||
|
UTF-8 version of make-string.
|
||||||
|
|
||||||
|
### string-append-utf8-char [procedure]
|
||||||
|
|
||||||
|
(string-append-utf8-char s
|
||||||
|
ch)
|
||||||
|
|
||||||
|
UTF-8 character append.
|
||||||
|
|
||||||
|
### utf8-string->lists [procedure]
|
||||||
|
|
||||||
|
(utf8-string->lists str)
|
||||||
|
|
||||||
|
Converts a UTF-8 string into two lists: list of UTF-8 characters
|
||||||
|
of the string and a list of remaining bytes (as integers).
|
||||||
|
|
||||||
|
### utf8-bytes->lists [procedure]
|
||||||
|
|
||||||
|
(utf8-bytes->lists chars)
|
||||||
|
|
||||||
|
The same as above but accepts a list of bytes (as integers).
|
||||||
|
|
||||||
|
### utf8-string-next-char [procedure]
|
||||||
|
|
||||||
|
(utf8-string-next-char str
|
||||||
|
(si0 0))
|
||||||
|
|
||||||
|
Returns the position right after the character at specified
|
||||||
|
position.
|
||||||
|
|
||||||
|
### utf8-string-length [procedure]
|
||||||
|
|
||||||
|
(utf8-string-length s)
|
||||||
|
|
||||||
|
Calculates the length of given UTF-8 string.
|
||||||
|
|
||||||
|
### utf8-string->list [procedure]
|
||||||
|
|
||||||
|
(utf8-string->list s)
|
||||||
|
|
||||||
|
Converts utf8 string to list of unicode characters.
|
||||||
|
|
||||||
|
### list->utf8-string [procedure]
|
||||||
|
|
||||||
|
(list->utf8-string lst)
|
||||||
|
|
||||||
|
Converts list of unicode characters into utf8 string.
|
||||||
|
|
||||||
|
### string-utf8? [procedure]
|
||||||
|
|
||||||
|
(string-utf8? s)
|
||||||
|
|
||||||
|
Returns true, if given string contains UTF-8 characters.
|
||||||
|
|
|
@ -65,14 +65,15 @@ GENDOC-SOURCES=gendoc.scm duck-extract.import.scm \
|
||||||
util-proc.import.scm util-format.import.scm \
|
util-proc.import.scm util-format.import.scm \
|
||||||
util-tag.import.scm util-string.import.scm \
|
util-tag.import.scm util-string.import.scm \
|
||||||
util-bst.import.scm util-bst-bdict.import.scm \
|
util-bst.import.scm util-bst-bdict.import.scm \
|
||||||
util-bst-ldict.import.scm util-dir.import.scm
|
util-bst-ldict.import.scm util-dir.import.scm \
|
||||||
|
util-utf8.import.scm
|
||||||
|
|
||||||
GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \
|
GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \
|
||||||
progress.o testing.o util-proc.o util-git.o util-io.o \
|
progress.o testing.o util-proc.o util-git.o util-io.o \
|
||||||
util-stdout.o util-parser.o util-list.o util-proc.o \
|
util-stdout.o util-parser.o util-list.o util-proc.o \
|
||||||
util-format.o racket-kwargs.o util-bst-ldict.o util-tag.o \
|
util-format.o racket-kwargs.o util-bst-ldict.o util-tag.o \
|
||||||
duck.o util-string.o util-bst.o util-bst-bdict.o \
|
duck.o util-string.o util-bst.o util-bst-bdict.o \
|
||||||
util-bst-ldict.o util-dir.o
|
util-bst-ldict.o util-dir.o util-utf8.o
|
||||||
|
|
||||||
.PHONY: imports
|
.PHONY: imports
|
||||||
imports: $(HACKERBASE-DEPS)
|
imports: $(HACKERBASE-DEPS)
|
||||||
|
@ -444,7 +445,8 @@ SGR-STATE-SOURCES=sgr-state.scm testing.import.scm \
|
||||||
sgr-state.o: sgr-state.import.scm
|
sgr-state.o: sgr-state.import.scm
|
||||||
sgr-state.import.scm: $(SGR-STATE-SOURCES)
|
sgr-state.import.scm: $(SGR-STATE-SOURCES)
|
||||||
|
|
||||||
UTIL-UTF8-SOURCES=util-utf8.scm testing.import.scm
|
UTIL-UTF8-SOURCES=util-utf8.scm testing.import.scm duck.import.scm \
|
||||||
|
racket-kwargs.import.scm
|
||||||
|
|
||||||
util-utf8.o: util-utf8.import.scm
|
util-utf8.o: util-utf8.import.scm
|
||||||
util-utf8.import.scm: $(UTIL-UTF8-SOURCES)
|
util-utf8.import.scm: $(UTIL-UTF8-SOURCES)
|
||||||
|
|
|
@ -37,4 +37,5 @@
|
||||||
util-bst-ldict
|
util-bst-ldict
|
||||||
util-bst-lset
|
util-bst-lset
|
||||||
util-dir
|
util-dir
|
||||||
|
util-utf8
|
||||||
)
|
)
|
||||||
|
|
|
@ -24,8 +24,11 @@
|
||||||
|
|
||||||
(declare (unit util-utf8))
|
(declare (unit util-utf8))
|
||||||
|
|
||||||
(module
|
(import duck)
|
||||||
|
|
||||||
|
(module*
|
||||||
util-utf8
|
util-utf8
|
||||||
|
#:doc ("High-performance UTF-8 support.")
|
||||||
(
|
(
|
||||||
utf8-char->string
|
utf8-char->string
|
||||||
|
|
||||||
|
@ -52,10 +55,11 @@
|
||||||
(import scheme
|
(import scheme
|
||||||
(chicken base)
|
(chicken base)
|
||||||
(chicken bitwise)
|
(chicken bitwise)
|
||||||
testing)
|
testing
|
||||||
|
racket-kwargs)
|
||||||
|
|
||||||
;; Encodes given character as UTF-8
|
(define/doc (utf8-char->string ch)
|
||||||
(define (utf8-char->string ch)
|
("Encodes given character as UTF-8.")
|
||||||
(let ((n (char->integer ch)))
|
(let ((n (char->integer ch)))
|
||||||
(cond ((>= n #x10000)
|
(cond ((>= n #x10000)
|
||||||
(let ((r (make-string 4))
|
(let ((r (make-string 4))
|
||||||
|
@ -87,10 +91,9 @@
|
||||||
(else
|
(else
|
||||||
(make-string 1 ch)))))
|
(make-string 1 ch)))))
|
||||||
|
|
||||||
;; UTF-8 version of make-string
|
(define*/doc (make-utf8-string n (ch #\space))
|
||||||
(define (make-utf8-string n . chs)
|
("UTF-8 version of make-string.")
|
||||||
(let* ((ch (if (null? chs) #\space (car chs)))
|
(let* ((s (utf8-char->string ch))
|
||||||
(s (utf8-char->string ch))
|
|
||||||
(sl (string-length s))
|
(sl (string-length s))
|
||||||
(rl (* n sl))
|
(rl (* n sl))
|
||||||
(r (make-string rl)))
|
(r (make-string rl)))
|
||||||
|
@ -103,19 +106,19 @@
|
||||||
(loop (add1 ri)
|
(loop (add1 ri)
|
||||||
(if (= nsi sl) 0 nsi)))))))
|
(if (= nsi sl) 0 nsi)))))))
|
||||||
|
|
||||||
;; UTF-8 character append
|
(define/doc (string-append-utf8-char s ch)
|
||||||
(define (string-append-utf8-char s ch)
|
("UTF-8 character append.")
|
||||||
(string-append s (utf8-char->string ch)))
|
(string-append s (utf8-char->string ch)))
|
||||||
|
|
||||||
;; Converts a UTF-8 string into two lists: list of UTF-8 characters
|
(define/doc (utf8-string->lists str)
|
||||||
;; of the string and a list of remaining bytes (as integers).
|
("Converts a UTF-8 string into two lists: list of UTF-8 characters
|
||||||
(define (utf8-string->lists str)
|
of the string and a list of remaining bytes (as integers).")
|
||||||
(utf8-bytes->lists
|
(utf8-bytes->lists
|
||||||
(map char->integer
|
(map char->integer
|
||||||
(string->list str))))
|
(string->list str))))
|
||||||
|
|
||||||
;; The same as above but accepts a list of bytes (as integers)
|
(define/doc (utf8-bytes->lists chars)
|
||||||
(define (utf8-bytes->lists chars)
|
("The same as above but accepts a list of bytes (as integers).")
|
||||||
(let loop ((bytes chars)
|
(let loop ((bytes chars)
|
||||||
(rpending '())
|
(rpending '())
|
||||||
(pending 0)
|
(pending 0)
|
||||||
|
@ -175,11 +178,10 @@
|
||||||
(sub1 char-bytes)
|
(sub1 char-bytes)
|
||||||
res))))))))))
|
res))))))))))
|
||||||
|
|
||||||
;; Returns the position right after the character at specified
|
(define*/doc (utf8-string-next-char str (si0 0))
|
||||||
;; position.
|
("Returns the position right after the character at specified
|
||||||
(define (utf8-string-next-char str . sis)
|
position.")
|
||||||
(let ((len (string-length str))
|
(let ((len (string-length str)))
|
||||||
(si0 (if (null? sis) 0 (car sis))))
|
|
||||||
(let loop ((si si0)
|
(let loop ((si si0)
|
||||||
(pc 0))
|
(pc 0))
|
||||||
(if (or (= si len)
|
(if (or (= si len)
|
||||||
|
@ -200,8 +202,8 @@
|
||||||
(sub1 pc)
|
(sub1 pc)
|
||||||
0))))))))
|
0))))))))
|
||||||
|
|
||||||
;; Calculates the length of given UTF-8 string
|
(define/doc (utf8-string-length s)
|
||||||
(define (utf8-string-length s)
|
("Calculates the length of given UTF-8 string.")
|
||||||
(let ((l (string-length s)))
|
(let ((l (string-length s)))
|
||||||
(let loop ((si 0)
|
(let loop ((si 0)
|
||||||
(ci 0)
|
(ci 0)
|
||||||
|
@ -226,8 +228,8 @@
|
||||||
(sub1 pc)
|
(sub1 pc)
|
||||||
0))))))))
|
0))))))))
|
||||||
|
|
||||||
;; Converts utf8 string to list of unicode characters
|
(define/doc (utf8-string->list s)
|
||||||
(define (utf8-string->list s)
|
("Converts utf8 string to list of unicode characters.")
|
||||||
(let-values (((lst _) (utf8-string->lists s)))
|
(let-values (((lst _) (utf8-string->lists s)))
|
||||||
lst))
|
lst))
|
||||||
|
|
||||||
|
@ -265,8 +267,8 @@
|
||||||
(else
|
(else
|
||||||
(cons ch lst)))))
|
(cons ch lst)))))
|
||||||
|
|
||||||
;; Converts list of unicode characters into utf8 string
|
(define/doc (list->utf8-string lst)
|
||||||
(define (list->utf8-string lst)
|
("Converts list of unicode characters into utf8 string.")
|
||||||
(let loop ((lst lst)
|
(let loop ((lst lst)
|
||||||
(res '()))
|
(res '()))
|
||||||
(if (null? lst)
|
(if (null? lst)
|
||||||
|
@ -274,8 +276,8 @@
|
||||||
(loop (cdr lst)
|
(loop (cdr lst)
|
||||||
(prepend-unicode-char-to-utf8-list (car lst) res)))))
|
(prepend-unicode-char-to-utf8-list (car lst) res)))))
|
||||||
|
|
||||||
;; Returns true, if given string contains UTF-8 characters
|
(define/doc (string-utf8? s)
|
||||||
(define (string-utf8? s)
|
("Returns true, if given string contains UTF-8 characters.")
|
||||||
(let ((asciilen (string-length s))
|
(let ((asciilen (string-length s))
|
||||||
(utf8len (utf8-string-length s)))
|
(utf8len (utf8-string-length s)))
|
||||||
(not (= asciilen utf8len))))
|
(not (= asciilen utf8len))))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue