Duck util-string.

This commit is contained in:
Dominik Pantůček 2023-07-06 19:47:17 +02:00
parent 3833196533
commit 722ac4830c
5 changed files with 58 additions and 56 deletions

View file

@ -371,3 +371,37 @@ A unifying module for compound data structures tagging.
Creates a unique and collision free symbol to identify compound data
structures based on lists and pairs.
## util-string [module]
(import util-string)
String manipulation functions which are used throughout other modules.
### string-first+rest [procedure]
(string-first+rest str)
* ```str``` - a string to split
Returns a pair of strings where the ```car``` of the pair is the first
token in the ```str``` given and ```cdr``` is a string with the
remainder with leading whitespace removed.
### string->qp [procedure]
(string->qp str)
* ```str``` - arbitrary string
Returns a new string with all non-ASCII characters encoded as
quoted-printable sequences.
### string-upcase [procedure]
(string-upcase str)
* ```str``` - arbitrary string
Returns the ```str``` with all characters converted to upper case
using ```char-upcase```. Does not work with UTF-8.

View file

@ -243,52 +243,3 @@ Returns a new lset instance from ```ls1``` with all elements in
* ```ls2``` - lset instance
Returns true if the sets contain exactly the same values.
### String
(import util-string)
String manipulation functions which are used throughout other modules.
(string-repeat str rep)
* ```str``` - string to repeat
* ```rep``` - number of repeats
Returns a string created by repeating the string ```str``` exactly
```rep``` number of times.
(string-first+rest str)
* ```str``` - a string to split
Returns a pair of strings where the ```car``` of the pair is the first
token in the ```str``` given and ```cdr``` is a string with the
remainder with leading whitespace removed.
(string-utf8? str)
* ```str``` - arbitrary string
Returns ```#t``` if given string ```str``` contains UTF-8 characters.
(string->list/utf8 str)
* ```str``` - arbitrary string
Returns a list of strings representing individual (possibly UTF-8)
characters of the string.
(string->qp str)
* ```str``` - arbitrary string
Returns a new string with all non-ASCII characters encoded as
quoted-printable sequences.
(string-upcase str)
* ```str``` - arbitrary string
Returns the ```str``` with all characters converted to upper case
using ```char-upcase```. Does not work with UTF-8.

View file

@ -62,13 +62,13 @@ GENDOC-SOURCES=gendoc.scm duck-extract.import.scm \
util-io.import.scm util-stdout.import.scm \
util-parser.import.scm util-list.import.scm \
util-proc.import.scm util-format.import.scm \
util-tag.import.scm
util-tag.import.scm util-string.import.scm
GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \
progress.o testing.o util-proc.o util-git.o util-io.o \
util-stdout.o util-parser.o util-list.o util-proc.o \
util-format.o racket-kwargs.o util-dict-list.o util-tag.o \
util-set-list.o duck.o
util-set-list.o duck.o util-string.o
.PHONY: imports
imports: $(HACKERBASE-DEPS)
@ -289,7 +289,7 @@ util-io.o: util-io.import.scm
util-io.import.scm: $(UTIL-IO-SOURCES)
UTIL-STRING-SOURCES=util-string.scm testing.import.scm \
util-utf8.import.scm
util-utf8.import.scm duck.import.scm
util-string.o: util-string.import.scm
util-string.import.scm: $(UTIL-STRING-SOURCES)

View file

@ -30,4 +30,5 @@
util-proc
util-format
util-tag
util-string
)

View file

@ -25,8 +25,11 @@
(declare (unit util-string))
(module
(import duck)
(module*
util-string
#:doc ("String manipulation functions which are used throughout other modules.")
(
string-first+rest
@ -45,7 +48,12 @@
util-utf8)
;; Extracts first token and the rest as separate string
(define (string-first+rest str)
(define/doc (string-first+rest str)
("* ```str``` - a string to split
Returns a pair of strings where the ```car``` of the pair is the first
token in the ```str``` given and ```cdr``` is a string with the
remainder with leading whitespace removed.")
(let ((dm (irregex-search (irregex "[ \\t]" 'u) str)))
(if dm
(let* ((sep-idx (irregex-match-start-index dm))
@ -56,7 +64,11 @@
(cons str ""))))
;; Encodes given UTF-8 string as quoted-printable
(define (string->qp str)
(define/doc (string->qp str)
("* ```str``` - arbitrary string
Returns a new string with all non-ASCII characters encoded as
quoted-printable sequences.")
(let loop ((lst (utf8-string->list str))
(res '()))
(if (null? lst)
@ -82,7 +94,11 @@
res)))))
;; Returns upper-case version of the string
(define (string-upcase str)
(define/doc (string-upcase str)
("* ```str``` - arbitrary string
Returns the ```str``` with all characters converted to upper case
using ```char-upcase```. Does not work with UTF-8.")
(list->string
(map char-upcase
(string->list str))))