Finish ducking.

2023-07-07 14:42:57 +02:00 · 2023-07-07 14:42:57 +02:00 · c42582a799
commit c42582a799
parent 1b18c99d3c
4 changed files with 107 additions and 31 deletions
--- a/doc/d-utils.md
+++ b/doc/d-utils.md
@ -609,3 +609,74 @@ Directory handling which didn't fit elsewhere.

 Makes sure given path exists and it is a directory. Throws an error
 if it exists and it is not a directory.
+
+## util-utf8 [module]
+
+    (import util-utf8)
+
+High-performance UTF-8 support.
+
+### utf8-char->string [procedure]
+
+    (utf8-char->string ch)
+
+Encodes given character as UTF-8.
+
+### make-utf8-string [procedure]
+
+    (make-utf8-string n
+                      (ch #\space))
+
+UTF-8 version of make-string.
+
+### string-append-utf8-char [procedure]
+
+    (string-append-utf8-char s
+                             ch)
+
+UTF-8 character append.
+
+### utf8-string->lists [procedure]
+
+    (utf8-string->lists str)
+
+Converts a UTF-8 string into two lists: list of UTF-8 characters
+of the string and a list of remaining bytes (as integers).
+
+### utf8-bytes->lists [procedure]
+
+    (utf8-bytes->lists chars)
+
+The same as above but accepts a list of bytes (as integers).
+
+### utf8-string-next-char [procedure]
+
+    (utf8-string-next-char str
+                           (si0 0))
+
+Returns the position right after the character at specified
+position.
+
+### utf8-string-length [procedure]
+
+    (utf8-string-length s)
+
+Calculates the length of given UTF-8 string.
+
+### utf8-string->list [procedure]
+
+    (utf8-string->list s)
+
+Converts utf8 string to list of unicode characters.
+
+### list->utf8-string [procedure]
+
+    (list->utf8-string lst)
+
+Converts list of unicode characters into utf8 string.
+
+### string-utf8? [procedure]
+
+    (string-utf8? s)
+
+Returns true, if given string contains UTF-8 characters.
--- a/src/Makefile
+++ b/src/Makefile
@ -65,14 +65,15 @@ GENDOC-SOURCES=gendoc.scm duck-extract.import.scm			\
 	util-proc.import.scm util-format.import.scm			\
 	util-tag.import.scm util-string.import.scm			\
 	util-bst.import.scm util-bst-bdict.import.scm			\
-	util-bst-ldict.import.scm util-dir.import.scm
+	util-bst-ldict.import.scm util-dir.import.scm			\
+	util-utf8.import.scm

 GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o	\
 	progress.o testing.o util-proc.o util-git.o util-io.o		\
 	util-stdout.o util-parser.o util-list.o util-proc.o		\
 	util-format.o racket-kwargs.o util-bst-ldict.o util-tag.o	\
 	duck.o util-string.o util-bst.o util-bst-bdict.o		\
-	util-bst-ldict.o util-dir.o
+	util-bst-ldict.o util-dir.o util-utf8.o

 .PHONY: imports
 imports: $(HACKERBASE-DEPS)
@ -444,7 +445,8 @@ SGR-STATE-SOURCES=sgr-state.scm testing.import.scm	\
 sgr-state.o: sgr-state.import.scm
 sgr-state.import.scm: $(SGR-STATE-SOURCES)

-UTIL-UTF8-SOURCES=util-utf8.scm testing.import.scm
+UTIL-UTF8-SOURCES=util-utf8.scm testing.import.scm duck.import.scm	\
+	racket-kwargs.import.scm

 util-utf8.o: util-utf8.import.scm
 util-utf8.import.scm: $(UTIL-UTF8-SOURCES)
--- a/src/gendoc.scm
+++ b/src/gendoc.scm
@ -37,4 +37,5 @@
 	  util-bst-ldict
 	  util-bst-lset
 	  util-dir
+	  util-utf8
 	  )
--- a/src/util-utf8.scm
+++ b/src/util-utf8.scm
@ -24,8 +24,11 @@

 (declare (unit util-utf8))

-(module
+(import duck)
+
+(module*
 util-utf8
+ #:doc ("High-performance UTF-8 support.")
 (
  utf8-char->string
  
@ -52,10 +55,11 @@
 (import scheme
 	 (chicken base)
 	 (chicken bitwise)
-	 testing)
+	 testing
+	 racket-kwargs)

- ;; Encodes given character as UTF-8
- (define (utf8-char->string ch)
+ (define/doc (utf8-char->string ch)
+   ("Encodes given character as UTF-8.")
   (let ((n (char->integer ch)))
     (cond ((>= n #x10000)
 	    (let ((r (make-string 4))
@ -87,10 +91,9 @@
 	   (else
 	    (make-string 1 ch)))))

- ;; UTF-8 version of make-string
- (define (make-utf8-string n . chs)
-   (let* ((ch (if (null? chs) #\space (car chs)))
-	  (s (utf8-char->string ch))
+ (define*/doc (make-utf8-string n (ch #\space))
+   ("UTF-8 version of make-string.")
+   (let* ((s (utf8-char->string ch))
 	  (sl (string-length s))
 	  (rl (* n sl))
 	  (r (make-string rl)))
@ -103,19 +106,19 @@
 	       (loop (add1 ri)
 		     (if (= nsi sl) 0 nsi)))))))

- ;; UTF-8 character append
- (define (string-append-utf8-char s ch)
+ (define/doc (string-append-utf8-char s ch)
+   ("UTF-8 character append.")
   (string-append s (utf8-char->string ch)))

- ;; Converts a UTF-8 string into two lists: list of UTF-8 characters
- ;; of the string and a list of remaining bytes (as integers).
- (define (utf8-string->lists str)
+ (define/doc (utf8-string->lists str)
+   ("Converts a UTF-8 string into two lists: list of UTF-8 characters
+of the string and a list of remaining bytes (as integers).")
   (utf8-bytes->lists
    (map char->integer
 	 (string->list str))))

- ;; The same as above but accepts a list of bytes (as integers)
- (define (utf8-bytes->lists chars)
+ (define/doc (utf8-bytes->lists chars)
+   ("The same as above but accepts a list of bytes (as integers).")
   (let loop ((bytes chars)
 	      (rpending '())
 	      (pending 0)
@ -175,11 +178,10 @@
 				 (sub1 char-bytes)
 				 res))))))))))

- ;; Returns the position right after the character at specified
- ;; position.
- (define (utf8-string-next-char str . sis)
-   (let ((len (string-length str))
-	 (si0 (if (null? sis) 0 (car sis))))
+ (define*/doc (utf8-string-next-char str (si0 0))
+   ("Returns the position right after the character at specified
+position.")
+   (let ((len (string-length str)))
     (let loop ((si si0)
 		(pc 0))
       (if (or (= si len)
@ -200,8 +202,8 @@
 			   (sub1 pc)
 			   0))))))))

- ;; Calculates the length of given UTF-8 string
- (define (utf8-string-length s)
+ (define/doc (utf8-string-length s)
+   ("Calculates the length of given UTF-8 string.")
   (let ((l (string-length s)))
     (let loop ((si 0)
 		(ci 0)
@ -226,8 +228,8 @@
 			   (sub1 pc)
 			   0))))))))

- ;; Converts utf8 string to list of unicode characters
- (define (utf8-string->list s)
+ (define/doc (utf8-string->list s)
+   ("Converts utf8 string to list of unicode characters.")
   (let-values (((lst _) (utf8-string->lists s)))
     lst))

@ -265,8 +267,8 @@
 	   (else
 	    (cons ch lst)))))

- ;; Converts list of unicode characters into utf8 string
- (define (list->utf8-string lst)
+ (define/doc (list->utf8-string lst)
+   ("Converts list of unicode characters into utf8 string.")
   (let loop ((lst lst)
 	      (res '()))
     (if (null? lst)
@ -274,8 +276,8 @@
 	 (loop (cdr lst)
 	       (prepend-unicode-char-to-utf8-list (car lst) res)))))

- ;; Returns true, if given string contains UTF-8 characters
- (define (string-utf8? s)
+ (define/doc (string-utf8? s)
+   ("Returns true, if given string contains UTF-8 characters.")
   (let ((asciilen (string-length s))
 	 (utf8len (utf8-string-length s)))
     (not (= asciilen utf8len))))