;; ;; util-string.scm ;; ;; Various string utilities. ;; ;; ISC License ;; ;; Copyright 2023 Brmlab, z.s. ;; Dominik Pantůček ;; ;; Permission to use, copy, modify, and/or distribute this software ;; for any purpose with or without fee is hereby granted, provided ;; that the above copyright notice and this permission notice appear ;; in all copies. ;; ;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL ;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED ;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE ;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR ;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS ;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, ;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ;; (declare (unit util-string)) (module util-string ( string-repeat string-first+rest string-utf8? string->list/utf8 string->qp string-upcase string-tests! ) (import scheme (chicken base) (chicken string) (chicken irregex) testing) ;; Repeats given string. (define (string-repeat str rep) (let loop ((rep rep) (res '())) (if (> rep 0) (loop (sub1 rep) (cons str res)) (string-intersperse res "")))) ;; Extracts first token and the rest as separate string (define (string-first+rest str) (let ((dm (irregex-search (irregex "[ \\t]" 'u) str))) (if dm (let* ((sep-idx (irregex-match-start-index dm)) (key-str (substring str 0 sep-idx)) (sep+val (substring str sep-idx)) (val (irregex-replace (irregex "^[ \\t]*" 'u) sep+val ""))) (cons key-str val)) (cons str "")))) ;; Returns true, if given string contains UTF-8 characters (define (string-utf8? str) (let ((asciilen (string-length str)) (utf8len (length (string->list/utf8 str)))) (not (= asciilen utf8len)))) ;; Converts given UTF-8 string into a list of UTF-8 string characters. (define (string->list/utf8 str) (irregex-extract (irregex "." 'u) str)) ;; Encodes given UTF-8 string as quoted-printable (define (string->qp str) (let loop ((lst (string->list/utf8 str)) (res '())) (if (null? lst) (string-intersperse (reverse res) "") (loop (cdr lst) (cons (let* ((chs (car lst)) (ch1 (if (= (string-length chs) 1) (string-ref chs 0) (integer->char 31)))) (if (and (char>=? ch1 #\space) (char<=? ch1 #\~)) chs (string-intersperse (map (lambda (ch) (string-append "=" (substring (number->string (+ 256 (char->integer ch)) 16) 1))) (string->list chs)) ""))) res))))) ;; Returns upper-case version of the string (define (string-upcase str) (list->string (map char-upcase (string->list str)))) ;; Performs utils module self-tests. (define (string-tests!) (run-tests util-string (test-equal? string-repeat (string-repeat "-" 4) "----") (test-equal? string-repeat (string-repeat "š" 4) "šššš") (test-equal? string-first+rest (string-first+rest "asdf rest") '("asdf" . "rest")) (test-equal? string-first+rest (string-first+rest "asdf rest test rest") '("asdf" . "rest test rest")) (test-equal? string-first+rest (string-first+rest "asdf") '("asdf" . "")) (test-true string-utf8? (string-utf8? "ěščř")) (test-false string-utf8? (string-utf8? "Hello World!")) (test-equal? string->qp (string->qp "asdf") "asdf") (test-equal? string->qp (string->qp "asdfásdf") "asdf=c3=a1sdf") (test-equal? string-upcase (string-upcase "asdFGH") "ASDFGH") )) )