;; ;; sgr-list.scm ;; ;; Intermediate representation of strings with SGR state changes. ;; ;; ISC License ;; ;; Copyright 2023 Dominik Pantůček ;; ;; Permission to use, copy, modify, and/or distribute this software ;; for any purpose with or without fee is hereby granted, provided ;; that the above copyright notice and this permission notice appear ;; in all copies. ;; ;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL ;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED ;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE ;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR ;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS ;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, ;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ;; (declare (unit sgr-list)) (module sgr-list ( string->sgr-list string->sgr-list/words sgr-list->string sgr-list-length sgr-list-length/stretch sgr-list-length-w/o-glues sgr-token-spaces? sgr-token-glue? sgr-token-newline? sgr-token-unglue? sgr-token-neutralize sgr-list-neutralize sgr-list-num-glues sgr-list-min-width sgr-list-last-state sgr-list-tests! ) (import scheme (chicken base) (chicken string) racket-kwargs sgr-state testing util-utf8) ;; Converts given string into a list of string parts and sgr-states (define* (string->sgr-list str (initial-state empty-sgr-state)) (let ((src-len (string-length str))) (let loop ((src-idx 0) (token-start 0) (token-end 0) (token-pos 0) (state initial-state) (res '())) (if (>= src-idx src-len) (reverse (if (= token-start token-end) res (cons (cons (substring str token-start token-end) token-pos) res))) (let ((ch (string-ref str src-idx))) (cond ((eq? ch #\x1b) (let-values (((sgr-parsed next-idx) (parse-csi-sgr-sequence+pos str src-idx state))) (let ((token (update-sgr-state-from-list state sgr-parsed))) (loop next-idx next-idx next-idx 0 token (cons token (if (= token-start token-end) res (cons (cons (substring str token-start token-end) token-pos) res))))))) (else (let ((next-idx (utf8-string-next-char str src-idx))) (loop next-idx token-start next-idx (add1 token-pos) state res))))))))) ;; Converts given string into a list of string parts and sgr-states, ;; handling continuous whitespace as separate tokens. (define* (string->sgr-list/words str (initial-state empty-sgr-state)) (let ((src-len (string-length str))) (let loop ((src-idx 0) (token-start 0) (token-end 0) (token-pos 0) (state initial-state) (space? #f) (res '())) (if (>= src-idx src-len) (reverse (if (= token-start token-end) res (cons (cons (substring str token-start token-end) token-pos) res))) (let ((ch (string-ref str src-idx))) (cond ((eq? ch #\x1b) (let-values (((sgr-parsed next-idx) (parse-csi-sgr-sequence+pos str src-idx state))) (let ((token (update-sgr-state-from-list state sgr-parsed))) (loop next-idx next-idx next-idx 0 token #f (cons token (if (= token-start token-end) res (cons (cons (substring str token-start token-end) token-pos) res))))))) ((eq? ch #\backspace) (let ((next-idx (add1 src-idx))) (loop next-idx next-idx next-idx 0 state #f (cons (cons "\b" 1) (if (= token-start token-end) res (cons (cons (substring str token-start token-end) token-pos) res)))))) ((eq? ch #\newline) (let ((next-idx (add1 src-idx))) (loop next-idx next-idx next-idx 0 state #f (cons (cons "\n" 1) (if (= token-start token-end) res (cons (cons (substring str token-start token-end) token-pos) res)))))) ((memq ch '(#\space #\tab)) (let ((next-idx (add1 src-idx))) (loop next-idx (if space? token-start src-idx) next-idx (if space? (add1 token-pos) 1) state #t (if (and (not space?) (> token-end token-start)) (cons (cons (substring str token-start token-end) token-pos) res) res)))) (else (let ((next-idx (utf8-string-next-char str src-idx))) (loop next-idx (if space? src-idx token-start) next-idx (if space? 1 (add1 token-pos)) state #f (if space? (cons (cons (substring str token-start token-end) token-pos) res) res)))))))))) ;; Converts a SGR list into a single string (define* (sgr-list->string lst (initial-state empty-sgr-state) #:reset-state (reset-state #f)) (let loop ((lst lst) (res '()) (state initial-state)) (if (null? lst) (string-intersperse (reverse (if reset-state (cons reset-state res) res)) "") (let ((token (car lst))) (if (sgr-state? token) (loop (cdr lst) (cons (sgr-state-change->string state token) res) token) (loop (cdr lst) (cons (car token) res) state)))))) ;; Predicate for SGR list tokens (define (sgr-list-token? v) (and (pair? v) (string? (car v)) (fixnum? (cdr v)))) ;; Returns the length of all utf8 strings in the sgr-list (define (sgr-list-length sl) (foldl (lambda (acc tk) (if (sgr-list-token? tk) (+ acc (cdr tk)) acc)) 0 sl)) ;; Returns the length of all utf8 strings without glues. Glues must ;; have at least width 1 if they are not the first or last ;; token. Those can have zero length. All spaces have width of ;; 1. Used for wrapping blocks. (define (sgr-list-length/stretch sl) (let loop ((sl sl) (len 0) (seen-string? #f) (last-space? #f)) (if (null? sl) (if last-space? (sub1 len) len) (let* ((tk (car sl)) (is-spaces? (sgr-token-spaces? tk)) (is-string? (and (not (sgr-state? tk)) (not is-spaces?)))) (loop (cdr sl) (cond ((sgr-token-spaces? tk) (if seen-string? (add1 len) len)) ((sgr-state? tk) len) (else (+ len (cdr tk)))) (or seen-string? is-string?) (if is-string? #f (if is-spaces? #t last-space?))))))) ;; Returns length without glues (define (sgr-list-length-w/o-glues sl) (foldl (lambda (acc tk) (if (or (sgr-state? tk) (sgr-token-glue? tk) (sgr-token-unglue? tk)) acc (+ acc (cdr tk)))) 0 sl)) ;; Predicate for string token containing only spaces and tabs (define (sgr-token-spaces? t) (and (pair? t) (string? (car t)) (let loop ((i 0)) (if (= i (cdr t)) #t (if (memq (string-ref (car t) i) '(#\space #\tab)) (loop (add1 i)) #f))))) ;; Predicate for string token with only spaces and at least one tab (define (sgr-token-glue? t) (and (pair? t) (string? (car t)) (let loop ((i 0) (glue #f)) (if (= i (cdr t)) glue (let ((ch (string-ref (car t) i))) (if (eq? ch #\tab) (loop (add1 i) #t) (if (eq? ch #\space) (loop (add1 i) glue) #f))))))) ;; Predicate for string containing only a newline (define (sgr-token-newline? t) (and (pair? t) (string? (car t)) (eq? (cdr t) 1) (eq? (string-ref (car t) 0) #\newline))) ;; Predicate for unglue (used for justify) (define (sgr-token-unglue? t) (and (pair? t) (string? (car t)) (eq? (cdr t) 1) (eq? (string-ref (car t) 0) #\backspace))) ;; Replaces all occurences of #\tab with #\space and removes trailing ;; #\backspace (define (sgr-token-neutralize t) (if (and (pair? t) (string? (car t))) (let* ((str (string-copy (car t))) (len (string-length str))) (if (> len 0) (let loop ((idx 0)) (if (= idx len) (if (eq? (string-ref str (sub1 len)) #\backspace) (cons (substring str 0 (sub1 len)) (sub1 len)) (cons str len)) (let ((ch (string-ref str idx))) (when (eq? ch #\tab) (string-set! str idx #\space)) (loop (add1 idx))))) t)))) ;; Neutralizes whole SGR list (define (sgr-list-neutralize sl) (map sgr-token-neutralize sl)) ;; Returns the number of glues in given SGR list (define (sgr-list-num-glues sl) (foldl (lambda (acc t) (if (sgr-token-glue? t) (add1 acc) acc)) 0 sl)) ;; Returns the longest word (define (sgr-list-min-width sl) (let loop ((sl sl) (res 0)) (if (null? sl) res (let ((tk (car sl))) (loop (cdr sl) (if (sgr-state? tk) res (max res (cdr tk)))))))) ;; Returns the last SGR state in given sgr-list (define* (sgr-list-last-state sl (initial-state empty-sgr-state)) (let loop ((sl sl) (state initial-state)) (if (null? sl) state (loop (cdr sl) (if (sgr-state? (car sl)) (car sl) state))))) ;; Module self-tests (define (sgr-list-tests!) (run-tests sgr-list (test-equal? sgr-list->string (sgr-list->string '(("Hello" . 5))) "Hello") (test-true sgr-list-token? (sgr-list-token? '("Hello" . 5))) (test-false sgr-list-token? (sgr-list-token? empty-sgr-state)) (test-false sgr-list-token? (sgr-list-token? (set-sgr-state-foreground empty-sgr-state (make-sgr-truecolor 1 2 3)))) (test-equal? sgr-list-length (sgr-list-length '(("Hello" . 5))) 5) (test-true sgr-token-spaces? (sgr-token-spaces? '(" " . 5))) (test-true sgr-token-spaces? (sgr-token-spaces? '(" \x09 " . 5))) (test-false sgr-token-spaces? (sgr-token-spaces? '(" x " . 5))) (test-true sgr-token-newline? (sgr-token-newline? '("\n" . 1))) (test-false sgr-token-newline? (sgr-token-newline? '("\na" . 2))) (test-false sgr-token-newline? (sgr-token-newline? '("x" . 1))) (test-true sgr-token-glue? (sgr-token-glue? '(" \t " . 5))) (test-true sgr-token-glue? (sgr-token-glue? '("\t" . 1))) (test-false sgr-token-glue? (sgr-token-glue? '(" " . 5))) (test-false sgr-token-glue? (sgr-token-glue? '(" x " . 5))) (test-equal? string->sgr-list (string->sgr-list "Hello") '(("Hello" . 5))) (test-equal? string->sgr-list (string->sgr-list "\x1b[1mHello \x1b[0mWorld!") '(1 ("Hello " . 6) 0 ("World!" . 6))) (test-equal? string->sgr-list/words (string->sgr-list/words "Hello World!") '(("Hello" . 5) (" " . 1) ("World!" . 6))) (test-equal? string->sgr-list/words (string->sgr-list/words "\x1b[1mHello \x1b[0mWorld!") '(1 ("Hello" . 5) (" " . 1) 0 ("World!" . 6))) (test-equal? string->sgr-list/words (string->sgr-list/words "Hello World!\b") '(("Hello" . 5) (" " . 1) ("World!" . 6) ("\b" . 1))) (test-equal? string->sgr-list/words (string->sgr-list/words "Hello\nWorld!") '(("Hello" . 5) ("\n" . 1) ("World!" . 6))) (test-equal? sgr-list-length/stretch (sgr-list-length/stretch '(("Hello" . 5))) 5) (test-equal? sgr-list-length/stretch (sgr-list-length/stretch '(("Hello" . 5) (" " . 4))) 5) (test-equal? sgr-list-length/stretch (sgr-list-length/stretch '(("Hello" . 5) (" " . 4) 1)) 5) (test-equal? sgr-list-length/stretch (sgr-list-length/stretch '(1 (" " . 4) ("Hello" . 5) (" " . 4) 1)) 5) (test-equal? sgr-list-length/stretch (sgr-list-length/stretch '(("Hello" . 5) (" " . 4) ("World" . 5))) 11) (test-equal? sgr-list-length-w/o-glues (sgr-list-length-w/o-glues '(("Hello" . 5) (" \t " . 5) ("World" . 5))) 10) (test-true sgr-token-unglue? (sgr-token-unglue? '("\b" . 1))) (test-false sgr-token-unglue? (sgr-token-unglue? '(" \b" . 2))) (test-equal? sgr-token-neutralize (sgr-token-neutralize '("Hello\b" . 6)) '("Hello" . 5)) )) )