From 722ac4830c9198efd5bb4edd491e21d5d84eec92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Pant=C5=AF=C4=8Dek?= Date: Thu, 6 Jul 2023 19:47:17 +0200 Subject: [PATCH] Duck util-string. --- doc/d-utils.md | 34 +++++++++++++++++++++++++++++++ doc/utils.md | 49 --------------------------------------------- src/Makefile | 6 +++--- src/gendoc.scm | 1 + src/util-string.scm | 24 ++++++++++++++++++---- 5 files changed, 58 insertions(+), 56 deletions(-) diff --git a/doc/d-utils.md b/doc/d-utils.md index 72afa0d..7e8532f 100644 --- a/doc/d-utils.md +++ b/doc/d-utils.md @@ -371,3 +371,37 @@ A unifying module for compound data structures tagging. Creates a unique and collision free symbol to identify compound data structures based on lists and pairs. + +## util-string [module] + + (import util-string) + +String manipulation functions which are used throughout other modules. + +### string-first+rest [procedure] + + (string-first+rest str) + +* ```str``` - a string to split + +Returns a pair of strings where the ```car``` of the pair is the first +token in the ```str``` given and ```cdr``` is a string with the +remainder with leading whitespace removed. + +### string->qp [procedure] + + (string->qp str) + +* ```str``` - arbitrary string + +Returns a new string with all non-ASCII characters encoded as +quoted-printable sequences. + +### string-upcase [procedure] + + (string-upcase str) + +* ```str``` - arbitrary string + +Returns the ```str``` with all characters converted to upper case +using ```char-upcase```. Does not work with UTF-8. diff --git a/doc/utils.md b/doc/utils.md index 86f65e5..52a657d 100644 --- a/doc/utils.md +++ b/doc/utils.md @@ -243,52 +243,3 @@ Returns a new lset instance from ```ls1``` with all elements in * ```ls2``` - lset instance Returns true if the sets contain exactly the same values. - -### String - - (import util-string) - -String manipulation functions which are used throughout other modules. - - (string-repeat str rep) - -* ```str``` - string to repeat -* ```rep``` - number of repeats - -Returns a string created by repeating the string ```str``` exactly -```rep``` number of times. - - (string-first+rest str) - -* ```str``` - a string to split - -Returns a pair of strings where the ```car``` of the pair is the first -token in the ```str``` given and ```cdr``` is a string with the -remainder with leading whitespace removed. - - (string-utf8? str) - -* ```str``` - arbitrary string - -Returns ```#t``` if given string ```str``` contains UTF-8 characters. - - (string->list/utf8 str) - -* ```str``` - arbitrary string - -Returns a list of strings representing individual (possibly UTF-8) -characters of the string. - - (string->qp str) - -* ```str``` - arbitrary string - -Returns a new string with all non-ASCII characters encoded as -quoted-printable sequences. - - (string-upcase str) - -* ```str``` - arbitrary string - -Returns the ```str``` with all characters converted to upper case -using ```char-upcase```. Does not work with UTF-8. diff --git a/src/Makefile b/src/Makefile index c6a37e8..9a46e35 100644 --- a/src/Makefile +++ b/src/Makefile @@ -62,13 +62,13 @@ GENDOC-SOURCES=gendoc.scm duck-extract.import.scm \ util-io.import.scm util-stdout.import.scm \ util-parser.import.scm util-list.import.scm \ util-proc.import.scm util-format.import.scm \ - util-tag.import.scm + util-tag.import.scm util-string.import.scm GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \ progress.o testing.o util-proc.o util-git.o util-io.o \ util-stdout.o util-parser.o util-list.o util-proc.o \ util-format.o racket-kwargs.o util-dict-list.o util-tag.o \ - util-set-list.o duck.o + util-set-list.o duck.o util-string.o .PHONY: imports imports: $(HACKERBASE-DEPS) @@ -289,7 +289,7 @@ util-io.o: util-io.import.scm util-io.import.scm: $(UTIL-IO-SOURCES) UTIL-STRING-SOURCES=util-string.scm testing.import.scm \ - util-utf8.import.scm + util-utf8.import.scm duck.import.scm util-string.o: util-string.import.scm util-string.import.scm: $(UTIL-STRING-SOURCES) diff --git a/src/gendoc.scm b/src/gendoc.scm index 515e06d..77bba1d 100644 --- a/src/gendoc.scm +++ b/src/gendoc.scm @@ -30,4 +30,5 @@ util-proc util-format util-tag + util-string ) diff --git a/src/util-string.scm b/src/util-string.scm index 21850f4..54c6ece 100644 --- a/src/util-string.scm +++ b/src/util-string.scm @@ -25,8 +25,11 @@ (declare (unit util-string)) -(module +(import duck) + +(module* util-string + #:doc ("String manipulation functions which are used throughout other modules.") ( string-first+rest @@ -45,7 +48,12 @@ util-utf8) ;; Extracts first token and the rest as separate string - (define (string-first+rest str) + (define/doc (string-first+rest str) + ("* ```str``` - a string to split + +Returns a pair of strings where the ```car``` of the pair is the first +token in the ```str``` given and ```cdr``` is a string with the +remainder with leading whitespace removed.") (let ((dm (irregex-search (irregex "[ \\t]" 'u) str))) (if dm (let* ((sep-idx (irregex-match-start-index dm)) @@ -56,7 +64,11 @@ (cons str "")))) ;; Encodes given UTF-8 string as quoted-printable - (define (string->qp str) + (define/doc (string->qp str) + ("* ```str``` - arbitrary string + +Returns a new string with all non-ASCII characters encoded as +quoted-printable sequences.") (let loop ((lst (utf8-string->list str)) (res '())) (if (null? lst) @@ -82,7 +94,11 @@ res))))) ;; Returns upper-case version of the string - (define (string-upcase str) + (define/doc (string-upcase str) + ("* ```str``` - arbitrary string + +Returns the ```str``` with all characters converted to upper case +using ```char-upcase```. Does not work with UTF-8.") (list->string (map char-upcase (string->list str))))