146 lines
3.7 KiB
Scheme
146 lines
3.7 KiB
Scheme
;;
|
|
;; util-string.scm
|
|
;;
|
|
;; Various string utilities.
|
|
;;
|
|
;; ISC License
|
|
;;
|
|
;; Copyright 2023 Brmlab, z.s.
|
|
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
|
|
;;
|
|
;; Permission to use, copy, modify, and/or distribute this software
|
|
;; for any purpose with or without fee is hereby granted, provided
|
|
;; that the above copyright notice and this permission notice appear
|
|
;; in all copies.
|
|
;;
|
|
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
;;
|
|
|
|
(declare (unit util-string))
|
|
|
|
(module
|
|
util-string
|
|
(
|
|
string-repeat
|
|
|
|
string-first+rest
|
|
|
|
string-utf8?
|
|
string->list/utf8
|
|
|
|
string->qp
|
|
|
|
string-upcase
|
|
|
|
string-tests!
|
|
)
|
|
|
|
(import scheme
|
|
(chicken base)
|
|
(chicken string)
|
|
(chicken irregex)
|
|
testing)
|
|
|
|
;; Repeats given string.
|
|
(define (string-repeat str rep)
|
|
(let loop ((rep rep)
|
|
(res '()))
|
|
(if (> rep 0)
|
|
(loop (sub1 rep)
|
|
(cons str res))
|
|
(string-intersperse res ""))))
|
|
|
|
;; Extracts first token and the rest as separate string
|
|
(define (string-first+rest str)
|
|
(let ((dm (irregex-search (irregex "[ \\t]" 'u) str)))
|
|
(if dm
|
|
(let* ((sep-idx (irregex-match-start-index dm))
|
|
(key-str (substring str 0 sep-idx))
|
|
(sep+val (substring str sep-idx))
|
|
(val (irregex-replace (irregex "^[ \\t]*" 'u) sep+val "")))
|
|
(cons key-str val))
|
|
(cons str ""))))
|
|
|
|
;; Returns true, if given string contains UTF-8 characters
|
|
(define (string-utf8? str)
|
|
(let ((asciilen (string-length str))
|
|
(utf8len (length (string->list/utf8 str))))
|
|
(not (= asciilen utf8len))))
|
|
|
|
;; Converts given UTF-8 string into a list of UTF-8 string characters.
|
|
(define (string->list/utf8 str)
|
|
(irregex-extract (irregex "." 'u) str))
|
|
|
|
;; Encodes given UTF-8 string as quoted-printable
|
|
(define (string->qp str)
|
|
(let loop ((lst (string->list/utf8 str))
|
|
(res '()))
|
|
(if (null? lst)
|
|
(string-intersperse (reverse res) "")
|
|
(loop (cdr lst)
|
|
(cons (let* ((chs (car lst))
|
|
(ch1 (if (= (string-length chs) 1)
|
|
(string-ref chs 0)
|
|
(integer->char 31))))
|
|
(if (and (char>=? ch1 #\space)
|
|
(char<=? ch1 #\~))
|
|
chs
|
|
(string-intersperse
|
|
(map (lambda (ch)
|
|
(string-append "="
|
|
(substring
|
|
(number->string
|
|
(+ 256 (char->integer ch))
|
|
16)
|
|
1)))
|
|
(string->list chs))
|
|
"")))
|
|
res)))))
|
|
|
|
;; Returns upper-case version of the string
|
|
(define (string-upcase str)
|
|
(list->string
|
|
(map char-upcase
|
|
(string->list str))))
|
|
|
|
;; Performs utils module self-tests.
|
|
(define (string-tests!)
|
|
(run-tests
|
|
util-string
|
|
(test-equal? string-repeat
|
|
(string-repeat "-" 4)
|
|
"----")
|
|
(test-equal? string-repeat
|
|
(string-repeat "š" 4)
|
|
"šššš")
|
|
(test-equal? string-first+rest
|
|
(string-first+rest "asdf rest")
|
|
'("asdf" . "rest"))
|
|
(test-equal? string-first+rest
|
|
(string-first+rest "asdf rest test rest")
|
|
'("asdf" . "rest test rest"))
|
|
(test-equal? string-first+rest
|
|
(string-first+rest "asdf")
|
|
'("asdf" . ""))
|
|
(test-true string-utf8? (string-utf8? "ěščř"))
|
|
(test-false string-utf8? (string-utf8? "Hello World!"))
|
|
(test-equal? string->qp
|
|
(string->qp "asdf")
|
|
"asdf")
|
|
(test-equal? string->qp
|
|
(string->qp "asdfásdf")
|
|
"asdf=c3=a1sdf")
|
|
(test-equal? string-upcase
|
|
(string-upcase "asdFGH")
|
|
"ASDFGH")
|
|
))
|
|
|
|
)
|
|
|