Duck util-string.
This commit is contained in:
parent
3833196533
commit
722ac4830c
5 changed files with 58 additions and 56 deletions
|
@ -371,3 +371,37 @@ A unifying module for compound data structures tagging.
|
||||||
|
|
||||||
Creates a unique and collision free symbol to identify compound data
|
Creates a unique and collision free symbol to identify compound data
|
||||||
structures based on lists and pairs.
|
structures based on lists and pairs.
|
||||||
|
|
||||||
|
## util-string [module]
|
||||||
|
|
||||||
|
(import util-string)
|
||||||
|
|
||||||
|
String manipulation functions which are used throughout other modules.
|
||||||
|
|
||||||
|
### string-first+rest [procedure]
|
||||||
|
|
||||||
|
(string-first+rest str)
|
||||||
|
|
||||||
|
* ```str``` - a string to split
|
||||||
|
|
||||||
|
Returns a pair of strings where the ```car``` of the pair is the first
|
||||||
|
token in the ```str``` given and ```cdr``` is a string with the
|
||||||
|
remainder with leading whitespace removed.
|
||||||
|
|
||||||
|
### string->qp [procedure]
|
||||||
|
|
||||||
|
(string->qp str)
|
||||||
|
|
||||||
|
* ```str``` - arbitrary string
|
||||||
|
|
||||||
|
Returns a new string with all non-ASCII characters encoded as
|
||||||
|
quoted-printable sequences.
|
||||||
|
|
||||||
|
### string-upcase [procedure]
|
||||||
|
|
||||||
|
(string-upcase str)
|
||||||
|
|
||||||
|
* ```str``` - arbitrary string
|
||||||
|
|
||||||
|
Returns the ```str``` with all characters converted to upper case
|
||||||
|
using ```char-upcase```. Does not work with UTF-8.
|
||||||
|
|
49
doc/utils.md
49
doc/utils.md
|
@ -243,52 +243,3 @@ Returns a new lset instance from ```ls1``` with all elements in
|
||||||
* ```ls2``` - lset instance
|
* ```ls2``` - lset instance
|
||||||
|
|
||||||
Returns true if the sets contain exactly the same values.
|
Returns true if the sets contain exactly the same values.
|
||||||
|
|
||||||
### String
|
|
||||||
|
|
||||||
(import util-string)
|
|
||||||
|
|
||||||
String manipulation functions which are used throughout other modules.
|
|
||||||
|
|
||||||
(string-repeat str rep)
|
|
||||||
|
|
||||||
* ```str``` - string to repeat
|
|
||||||
* ```rep``` - number of repeats
|
|
||||||
|
|
||||||
Returns a string created by repeating the string ```str``` exactly
|
|
||||||
```rep``` number of times.
|
|
||||||
|
|
||||||
(string-first+rest str)
|
|
||||||
|
|
||||||
* ```str``` - a string to split
|
|
||||||
|
|
||||||
Returns a pair of strings where the ```car``` of the pair is the first
|
|
||||||
token in the ```str``` given and ```cdr``` is a string with the
|
|
||||||
remainder with leading whitespace removed.
|
|
||||||
|
|
||||||
(string-utf8? str)
|
|
||||||
|
|
||||||
* ```str``` - arbitrary string
|
|
||||||
|
|
||||||
Returns ```#t``` if given string ```str``` contains UTF-8 characters.
|
|
||||||
|
|
||||||
(string->list/utf8 str)
|
|
||||||
|
|
||||||
* ```str``` - arbitrary string
|
|
||||||
|
|
||||||
Returns a list of strings representing individual (possibly UTF-8)
|
|
||||||
characters of the string.
|
|
||||||
|
|
||||||
(string->qp str)
|
|
||||||
|
|
||||||
* ```str``` - arbitrary string
|
|
||||||
|
|
||||||
Returns a new string with all non-ASCII characters encoded as
|
|
||||||
quoted-printable sequences.
|
|
||||||
|
|
||||||
(string-upcase str)
|
|
||||||
|
|
||||||
* ```str``` - arbitrary string
|
|
||||||
|
|
||||||
Returns the ```str``` with all characters converted to upper case
|
|
||||||
using ```char-upcase```. Does not work with UTF-8.
|
|
||||||
|
|
|
@ -62,13 +62,13 @@ GENDOC-SOURCES=gendoc.scm duck-extract.import.scm \
|
||||||
util-io.import.scm util-stdout.import.scm \
|
util-io.import.scm util-stdout.import.scm \
|
||||||
util-parser.import.scm util-list.import.scm \
|
util-parser.import.scm util-list.import.scm \
|
||||||
util-proc.import.scm util-format.import.scm \
|
util-proc.import.scm util-format.import.scm \
|
||||||
util-tag.import.scm
|
util-tag.import.scm util-string.import.scm
|
||||||
|
|
||||||
GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \
|
GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \
|
||||||
progress.o testing.o util-proc.o util-git.o util-io.o \
|
progress.o testing.o util-proc.o util-git.o util-io.o \
|
||||||
util-stdout.o util-parser.o util-list.o util-proc.o \
|
util-stdout.o util-parser.o util-list.o util-proc.o \
|
||||||
util-format.o racket-kwargs.o util-dict-list.o util-tag.o \
|
util-format.o racket-kwargs.o util-dict-list.o util-tag.o \
|
||||||
util-set-list.o duck.o
|
util-set-list.o duck.o util-string.o
|
||||||
|
|
||||||
.PHONY: imports
|
.PHONY: imports
|
||||||
imports: $(HACKERBASE-DEPS)
|
imports: $(HACKERBASE-DEPS)
|
||||||
|
@ -289,7 +289,7 @@ util-io.o: util-io.import.scm
|
||||||
util-io.import.scm: $(UTIL-IO-SOURCES)
|
util-io.import.scm: $(UTIL-IO-SOURCES)
|
||||||
|
|
||||||
UTIL-STRING-SOURCES=util-string.scm testing.import.scm \
|
UTIL-STRING-SOURCES=util-string.scm testing.import.scm \
|
||||||
util-utf8.import.scm
|
util-utf8.import.scm duck.import.scm
|
||||||
|
|
||||||
util-string.o: util-string.import.scm
|
util-string.o: util-string.import.scm
|
||||||
util-string.import.scm: $(UTIL-STRING-SOURCES)
|
util-string.import.scm: $(UTIL-STRING-SOURCES)
|
||||||
|
|
|
@ -30,4 +30,5 @@
|
||||||
util-proc
|
util-proc
|
||||||
util-format
|
util-format
|
||||||
util-tag
|
util-tag
|
||||||
|
util-string
|
||||||
)
|
)
|
||||||
|
|
|
@ -25,8 +25,11 @@
|
||||||
|
|
||||||
(declare (unit util-string))
|
(declare (unit util-string))
|
||||||
|
|
||||||
(module
|
(import duck)
|
||||||
|
|
||||||
|
(module*
|
||||||
util-string
|
util-string
|
||||||
|
#:doc ("String manipulation functions which are used throughout other modules.")
|
||||||
(
|
(
|
||||||
string-first+rest
|
string-first+rest
|
||||||
|
|
||||||
|
@ -45,7 +48,12 @@
|
||||||
util-utf8)
|
util-utf8)
|
||||||
|
|
||||||
;; Extracts first token and the rest as separate string
|
;; Extracts first token and the rest as separate string
|
||||||
(define (string-first+rest str)
|
(define/doc (string-first+rest str)
|
||||||
|
("* ```str``` - a string to split
|
||||||
|
|
||||||
|
Returns a pair of strings where the ```car``` of the pair is the first
|
||||||
|
token in the ```str``` given and ```cdr``` is a string with the
|
||||||
|
remainder with leading whitespace removed.")
|
||||||
(let ((dm (irregex-search (irregex "[ \\t]" 'u) str)))
|
(let ((dm (irregex-search (irregex "[ \\t]" 'u) str)))
|
||||||
(if dm
|
(if dm
|
||||||
(let* ((sep-idx (irregex-match-start-index dm))
|
(let* ((sep-idx (irregex-match-start-index dm))
|
||||||
|
@ -56,7 +64,11 @@
|
||||||
(cons str ""))))
|
(cons str ""))))
|
||||||
|
|
||||||
;; Encodes given UTF-8 string as quoted-printable
|
;; Encodes given UTF-8 string as quoted-printable
|
||||||
(define (string->qp str)
|
(define/doc (string->qp str)
|
||||||
|
("* ```str``` - arbitrary string
|
||||||
|
|
||||||
|
Returns a new string with all non-ASCII characters encoded as
|
||||||
|
quoted-printable sequences.")
|
||||||
(let loop ((lst (utf8-string->list str))
|
(let loop ((lst (utf8-string->list str))
|
||||||
(res '()))
|
(res '()))
|
||||||
(if (null? lst)
|
(if (null? lst)
|
||||||
|
@ -82,7 +94,11 @@
|
||||||
res)))))
|
res)))))
|
||||||
|
|
||||||
;; Returns upper-case version of the string
|
;; Returns upper-case version of the string
|
||||||
(define (string-upcase str)
|
(define/doc (string-upcase str)
|
||||||
|
("* ```str``` - arbitrary string
|
||||||
|
|
||||||
|
Returns the ```str``` with all characters converted to upper case
|
||||||
|
using ```char-upcase```. Does not work with UTF-8.")
|
||||||
(list->string
|
(list->string
|
||||||
(map char-upcase
|
(map char-upcase
|
||||||
(string->list str))))
|
(string->list str))))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue