Duck util-string.
This commit is contained in:
parent
3833196533
commit
722ac4830c
5 changed files with 58 additions and 56 deletions
|
@ -371,3 +371,37 @@ A unifying module for compound data structures tagging.
|
|||
|
||||
Creates a unique and collision free symbol to identify compound data
|
||||
structures based on lists and pairs.
|
||||
|
||||
## util-string [module]
|
||||
|
||||
(import util-string)
|
||||
|
||||
String manipulation functions which are used throughout other modules.
|
||||
|
||||
### string-first+rest [procedure]
|
||||
|
||||
(string-first+rest str)
|
||||
|
||||
* ```str``` - a string to split
|
||||
|
||||
Returns a pair of strings where the ```car``` of the pair is the first
|
||||
token in the ```str``` given and ```cdr``` is a string with the
|
||||
remainder with leading whitespace removed.
|
||||
|
||||
### string->qp [procedure]
|
||||
|
||||
(string->qp str)
|
||||
|
||||
* ```str``` - arbitrary string
|
||||
|
||||
Returns a new string with all non-ASCII characters encoded as
|
||||
quoted-printable sequences.
|
||||
|
||||
### string-upcase [procedure]
|
||||
|
||||
(string-upcase str)
|
||||
|
||||
* ```str``` - arbitrary string
|
||||
|
||||
Returns the ```str``` with all characters converted to upper case
|
||||
using ```char-upcase```. Does not work with UTF-8.
|
||||
|
|
49
doc/utils.md
49
doc/utils.md
|
@ -243,52 +243,3 @@ Returns a new lset instance from ```ls1``` with all elements in
|
|||
* ```ls2``` - lset instance
|
||||
|
||||
Returns true if the sets contain exactly the same values.
|
||||
|
||||
### String
|
||||
|
||||
(import util-string)
|
||||
|
||||
String manipulation functions which are used throughout other modules.
|
||||
|
||||
(string-repeat str rep)
|
||||
|
||||
* ```str``` - string to repeat
|
||||
* ```rep``` - number of repeats
|
||||
|
||||
Returns a string created by repeating the string ```str``` exactly
|
||||
```rep``` number of times.
|
||||
|
||||
(string-first+rest str)
|
||||
|
||||
* ```str``` - a string to split
|
||||
|
||||
Returns a pair of strings where the ```car``` of the pair is the first
|
||||
token in the ```str``` given and ```cdr``` is a string with the
|
||||
remainder with leading whitespace removed.
|
||||
|
||||
(string-utf8? str)
|
||||
|
||||
* ```str``` - arbitrary string
|
||||
|
||||
Returns ```#t``` if given string ```str``` contains UTF-8 characters.
|
||||
|
||||
(string->list/utf8 str)
|
||||
|
||||
* ```str``` - arbitrary string
|
||||
|
||||
Returns a list of strings representing individual (possibly UTF-8)
|
||||
characters of the string.
|
||||
|
||||
(string->qp str)
|
||||
|
||||
* ```str``` - arbitrary string
|
||||
|
||||
Returns a new string with all non-ASCII characters encoded as
|
||||
quoted-printable sequences.
|
||||
|
||||
(string-upcase str)
|
||||
|
||||
* ```str``` - arbitrary string
|
||||
|
||||
Returns the ```str``` with all characters converted to upper case
|
||||
using ```char-upcase```. Does not work with UTF-8.
|
||||
|
|
|
@ -62,13 +62,13 @@ GENDOC-SOURCES=gendoc.scm duck-extract.import.scm \
|
|||
util-io.import.scm util-stdout.import.scm \
|
||||
util-parser.import.scm util-list.import.scm \
|
||||
util-proc.import.scm util-format.import.scm \
|
||||
util-tag.import.scm
|
||||
util-tag.import.scm util-string.import.scm
|
||||
|
||||
GENDOC-OBJS=gendoc.o duck-extract.o util-time.o util-csv.o util-io.o \
|
||||
progress.o testing.o util-proc.o util-git.o util-io.o \
|
||||
util-stdout.o util-parser.o util-list.o util-proc.o \
|
||||
util-format.o racket-kwargs.o util-dict-list.o util-tag.o \
|
||||
util-set-list.o duck.o
|
||||
util-set-list.o duck.o util-string.o
|
||||
|
||||
.PHONY: imports
|
||||
imports: $(HACKERBASE-DEPS)
|
||||
|
@ -289,7 +289,7 @@ util-io.o: util-io.import.scm
|
|||
util-io.import.scm: $(UTIL-IO-SOURCES)
|
||||
|
||||
UTIL-STRING-SOURCES=util-string.scm testing.import.scm \
|
||||
util-utf8.import.scm
|
||||
util-utf8.import.scm duck.import.scm
|
||||
|
||||
util-string.o: util-string.import.scm
|
||||
util-string.import.scm: $(UTIL-STRING-SOURCES)
|
||||
|
|
|
@ -30,4 +30,5 @@
|
|||
util-proc
|
||||
util-format
|
||||
util-tag
|
||||
util-string
|
||||
)
|
||||
|
|
|
@ -25,8 +25,11 @@
|
|||
|
||||
(declare (unit util-string))
|
||||
|
||||
(module
|
||||
(import duck)
|
||||
|
||||
(module*
|
||||
util-string
|
||||
#:doc ("String manipulation functions which are used throughout other modules.")
|
||||
(
|
||||
string-first+rest
|
||||
|
||||
|
@ -45,7 +48,12 @@
|
|||
util-utf8)
|
||||
|
||||
;; Extracts first token and the rest as separate string
|
||||
(define (string-first+rest str)
|
||||
(define/doc (string-first+rest str)
|
||||
("* ```str``` - a string to split
|
||||
|
||||
Returns a pair of strings where the ```car``` of the pair is the first
|
||||
token in the ```str``` given and ```cdr``` is a string with the
|
||||
remainder with leading whitespace removed.")
|
||||
(let ((dm (irregex-search (irregex "[ \\t]" 'u) str)))
|
||||
(if dm
|
||||
(let* ((sep-idx (irregex-match-start-index dm))
|
||||
|
@ -56,7 +64,11 @@
|
|||
(cons str ""))))
|
||||
|
||||
;; Encodes given UTF-8 string as quoted-printable
|
||||
(define (string->qp str)
|
||||
(define/doc (string->qp str)
|
||||
("* ```str``` - arbitrary string
|
||||
|
||||
Returns a new string with all non-ASCII characters encoded as
|
||||
quoted-printable sequences.")
|
||||
(let loop ((lst (utf8-string->list str))
|
||||
(res '()))
|
||||
(if (null? lst)
|
||||
|
@ -82,7 +94,11 @@
|
|||
res)))))
|
||||
|
||||
;; Returns upper-case version of the string
|
||||
(define (string-upcase str)
|
||||
(define/doc (string-upcase str)
|
||||
("* ```str``` - arbitrary string
|
||||
|
||||
Returns the ```str``` with all characters converted to upper case
|
||||
using ```char-upcase```. Does not work with UTF-8.")
|
||||
(list->string
|
||||
(map char-upcase
|
||||
(string->list str))))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue