Duck util-string.

This commit is contained in:
Dominik Pantůček 2023-07-06 19:47:17 +02:00
parent 3833196533
commit 722ac4830c
5 changed files with 58 additions and 56 deletions

View file

@ -371,3 +371,37 @@ A unifying module for compound data structures tagging.
Creates a unique and collision free symbol to identify compound data Creates a unique and collision free symbol to identify compound data
structures based on lists and pairs. structures based on lists and pairs.
## util-string [module]
(import util-string)
String manipulation functions which are used throughout other modules.
### string-first+rest [procedure]
(string-first+rest str)
* ```str``` - a string to split
Returns a pair of strings where the ```car``` of the pair is the first
token in the ```str``` given and ```cdr``` is a string with the
remainder with leading whitespace removed.
### string->qp [procedure]
(string->qp str)
* ```str``` - arbitrary string
Returns a new string with all non-ASCII characters encoded as
quoted-printable sequences.
### string-upcase [procedure]
(string-upcase str)
* ```str``` - arbitrary string
Returns the ```str``` with all characters converted to upper case
using ```char-upcase```. Does not work with UTF-8.

View file

@ -243,52 +243,3 @@ Returns a new lset instance from ```ls1``` with all elements in
* ```ls2``` - lset instance * ```ls2``` - lset instance
Returns true if the sets contain exactly the same values. Returns true if the sets contain exactly the same values.
### String
(import util-string)
String manipulation functions which are used throughout other modules.
(string-repeat str rep)
* ```str``` - string to repeat
* ```rep``` - number of repeats
Returns a string created by repeating the string ```str``` exactly
```rep``` number of times.
(string-first+rest str)
* ```str``` - a string to split
Returns a pair of strings where the ```car``` of the pair is the first
token in the ```str``` given and ```cdr``` is a string with the
remainder with leading whitespace removed.
(string-utf8? str)
* ```str``` - arbitrary string
Returns ```#t``` if given string ```str``` contains UTF-8 characters.
(string->list/utf8 str)
* ```str``` - arbitrary string
Returns a list of strings representing individual (possibly UTF-8)
characters of the string.
(string->qp str)
* ```str``` - arbitrary string
Returns a new string with all non-ASCII characters encoded as
quoted-printable sequences.
(string-upcase str)
* ```str``` - arbitrary string
Returns the ```str``` with all characters converted to upper case
using ```char-upcase```. Does not work with UTF-8.

View file

@ -62,13 +62,13 @@ GENDOC-SOURCES=gendoc.scm duck-extract.import.scm \
util-io.import.scm util-stdout.import.scm \ util-io.import.scm util-stdout.import.scm \
util-parser.import.scm util-list.import.scm \ util-parser.import.scm util-list.import.scm \
util-proc.import.scm util-format.import.scm \ util-proc.import.scm util-format.import.scm \
util-tag.import.scm util-tag.import.scm util-string.import.scm
GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \ GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \
progress.o testing.o util-proc.o util-git.o util-io.o \ progress.o testing.o util-proc.o util-git.o util-io.o \
util-stdout.o util-parser.o util-list.o util-proc.o \ util-stdout.o util-parser.o util-list.o util-proc.o \
util-format.o racket-kwargs.o util-dict-list.o util-tag.o \ util-format.o racket-kwargs.o util-dict-list.o util-tag.o \
util-set-list.o duck.o util-set-list.o duck.o util-string.o
.PHONY: imports .PHONY: imports
imports: $(HACKERBASE-DEPS) imports: $(HACKERBASE-DEPS)
@ -289,7 +289,7 @@ util-io.o: util-io.import.scm
util-io.import.scm: $(UTIL-IO-SOURCES) util-io.import.scm: $(UTIL-IO-SOURCES)
UTIL-STRING-SOURCES=util-string.scm testing.import.scm \ UTIL-STRING-SOURCES=util-string.scm testing.import.scm \
util-utf8.import.scm util-utf8.import.scm duck.import.scm
util-string.o: util-string.import.scm util-string.o: util-string.import.scm
util-string.import.scm: $(UTIL-STRING-SOURCES) util-string.import.scm: $(UTIL-STRING-SOURCES)

View file

@ -30,4 +30,5 @@
util-proc util-proc
util-format util-format
util-tag util-tag
util-string
) )

View file

@ -25,8 +25,11 @@
(declare (unit util-string)) (declare (unit util-string))
(module (import duck)
(module*
util-string util-string
#:doc ("String manipulation functions which are used throughout other modules.")
( (
string-first+rest string-first+rest
@ -45,7 +48,12 @@
util-utf8) util-utf8)
;; Extracts first token and the rest as separate string ;; Extracts first token and the rest as separate string
(define (string-first+rest str) (define/doc (string-first+rest str)
("* ```str``` - a string to split
Returns a pair of strings where the ```car``` of the pair is the first
token in the ```str``` given and ```cdr``` is a string with the
remainder with leading whitespace removed.")
(let ((dm (irregex-search (irregex "[ \\t]" 'u) str))) (let ((dm (irregex-search (irregex "[ \\t]" 'u) str)))
(if dm (if dm
(let* ((sep-idx (irregex-match-start-index dm)) (let* ((sep-idx (irregex-match-start-index dm))
@ -56,7 +64,11 @@
(cons str "")))) (cons str ""))))
;; Encodes given UTF-8 string as quoted-printable ;; Encodes given UTF-8 string as quoted-printable
(define (string->qp str) (define/doc (string->qp str)
("* ```str``` - arbitrary string
Returns a new string with all non-ASCII characters encoded as
quoted-printable sequences.")
(let loop ((lst (utf8-string->list str)) (let loop ((lst (utf8-string->list str))
(res '())) (res '()))
(if (null? lst) (if (null? lst)
@ -82,7 +94,11 @@
res))))) res)))))
;; Returns upper-case version of the string ;; Returns upper-case version of the string
(define (string-upcase str) (define/doc (string-upcase str)
("* ```str``` - arbitrary string
Returns the ```str``` with all characters converted to upper case
using ```char-upcase```. Does not work with UTF-8.")
(list->string (list->string
(map char-upcase (map char-upcase
(string->list str)))) (string->list str))))