hackerbase/src/util-string.scm

131 lines
3.5 KiB
Scheme

;;
;; util-string.scm
;;
;; Various string utilities.
;;
;; ISC License
;;
;; Copyright 2023 Brmlab, z.s.
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
;;
;; Permission to use, copy, modify, and/or distribute this software
;; for any purpose with or without fee is hereby granted, provided
;; that the above copyright notice and this permission notice appear
;; in all copies.
;;
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
;;
(declare (unit util-string))
(import duck)
(module*
util-string
#:doc ("String manipulation functions which are used throughout other modules.")
(
string-first+rest
string->qp
string-upcase
string-tests!
)
(import scheme
(chicken base)
(chicken string)
(chicken irregex)
testing
util-utf8)
;; Extracts first token and the rest as separate string
(define/doc (string-first+rest str)
("* ```str``` - a string to split
Returns a pair of strings where the ```car``` of the pair is the first
token in the ```str``` given and ```cdr``` is a string with the
remainder with leading whitespace removed.")
(let ((dm (irregex-search (irregex "[ \\t]" 'u) str)))
(if dm
(let* ((sep-idx (irregex-match-start-index dm))
(key-str (substring str 0 sep-idx))
(sep+val (substring str sep-idx))
(val (irregex-replace (irregex "^[ \\t]*" 'u) sep+val "")))
(cons key-str val))
(cons str ""))))
;; Encodes given UTF-8 string as quoted-printable
(define/doc (string->qp str)
("* ```str``` - arbitrary string
Returns a new string with all non-ASCII characters encoded as
quoted-printable sequences.")
(let loop ((lst (utf8-string->list str))
(res '()))
(if (null? lst)
(string-intersperse (reverse res) "")
(loop (cdr lst)
(cons (let* ((chs (utf8-char->string (car lst)))
(ch1 (if (= (string-length chs) 1)
(string-ref chs 0)
(integer->char 31))))
(if (and (char>=? ch1 #\space)
(char<=? ch1 #\~))
chs
(string-intersperse
(map (lambda (ch)
(string-append "="
(substring
(number->string
(+ 256 (char->integer ch))
16)
1)))
(string->list chs))
"")))
res)))))
;; Returns upper-case version of the string
(define/doc (string-upcase str)
("* ```str``` - arbitrary string
Returns the ```str``` with all characters converted to upper case
using ```char-upcase```. Does not work with UTF-8.")
(list->string
(map char-upcase
(string->list str))))
;; Performs utils module self-tests.
(define (string-tests!)
(run-tests
util-string
(test-equal? string-first+rest
(string-first+rest "asdf rest")
'("asdf" . "rest"))
(test-equal? string-first+rest
(string-first+rest "asdf rest test rest")
'("asdf" . "rest test rest"))
(test-equal? string-first+rest
(string-first+rest "asdf")
'("asdf" . ""))
(test-equal? string->qp
(string->qp "asdf")
"asdf")
(test-equal? string->qp
(string->qp "asdfásdf")
"asdf=c3=a1sdf")
(test-equal? string-upcase
(string-upcase "asdFGH")
"ASDFGH")
))
)