hackerbase/src/sgr-list.scm

485 lines
12 KiB
Scheme

;;
;; sgr-list.scm
;;
;; Intermediate representation of strings with SGR state changes.
;;
;; ISC License
;;
;; Copyright 2023 Dominik Pantůček <dominik.pantucek@trustica.cz>
;;
;; Permission to use, copy, modify, and/or distribute this software
;; for any purpose with or without fee is hereby granted, provided
;; that the above copyright notice and this permission notice appear
;; in all copies.
;;
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
;;
(declare (unit sgr-list))
(module
sgr-list
(
string->sgr-list
string->sgr-list/words
sgr-list->string
sgr-list-length
sgr-list-length/stretch
sgr-list-length-w/o-glues
sgr-token-spaces?
sgr-token-glue?
sgr-token-newline?
sgr-token-unglue?
sgr-token-neutralize
sgr-list-neutralize
sgr-list-num-glues
sgr-list-min-width
sgr-list-last-state
sgr-list-tests!
)
(import scheme
(chicken base)
(chicken string)
racket-kwargs
sgr-state
testing
util-utf8)
;; Converts given string into a list of string parts and sgr-states
(define* (string->sgr-list str
(initial-state empty-sgr-state))
(let ((src-len (string-length str)))
(let loop ((src-idx 0)
(token-start 0)
(token-end 0)
(token-pos 0)
(state initial-state)
(res '()))
(if (>= src-idx src-len)
(reverse
(if (= token-start token-end)
res
(cons (cons (substring str token-start token-end)
token-pos)
res)))
(let ((ch (string-ref str src-idx)))
(cond
((eq? ch #\x1b)
(let-values (((sgr-parsed next-idx)
(parse-csi-sgr-sequence+pos str src-idx state)))
(let ((token (update-sgr-state-from-list state sgr-parsed)))
(loop next-idx
next-idx
next-idx
0
token
(cons token
(if (= token-start token-end)
res
(cons (cons (substring str token-start token-end)
token-pos)
res)))))))
(else
(let ((next-idx (utf8-string-next-char str src-idx)))
(loop next-idx
token-start
next-idx
(add1 token-pos)
state
res)))))))))
;; Converts given string into a list of string parts and sgr-states,
;; handling continuous whitespace as separate tokens.
(define* (string->sgr-list/words str
(initial-state empty-sgr-state))
(let ((src-len (string-length str)))
(let loop ((src-idx 0)
(token-start 0)
(token-end 0)
(token-pos 0)
(state initial-state)
(space? #f)
(res '()))
(if (>= src-idx src-len)
(reverse
(if (= token-start token-end)
res
(cons (cons (substring str token-start token-end)
token-pos)
res)))
(let ((ch (string-ref str src-idx)))
(cond
((eq? ch #\x1b)
(let-values (((sgr-parsed next-idx)
(parse-csi-sgr-sequence+pos str src-idx state)))
(let ((token (update-sgr-state-from-list state sgr-parsed)))
(loop next-idx
next-idx
next-idx
0
token
#f
(cons token
(if (= token-start token-end)
res
(cons (cons (substring str token-start token-end)
token-pos)
res)))))))
((eq? ch #\backspace)
(let ((next-idx (add1 src-idx)))
(loop next-idx
next-idx
next-idx
0
state
#f
(cons (cons "\b" 1)
(if (= token-start token-end)
res
(cons (cons (substring str token-start token-end)
token-pos)
res))))))
((eq? ch #\newline)
(let ((next-idx (add1 src-idx)))
(loop next-idx
next-idx
next-idx
0
state
#f
(cons (cons "\n" 1)
(if (= token-start token-end)
res
(cons (cons (substring str token-start token-end)
token-pos)
res))))))
((memq ch '(#\space #\tab))
(let ((next-idx (add1 src-idx)))
(loop next-idx
(if space?
token-start
src-idx)
next-idx
(if space?
(add1 token-pos)
1)
state
#t
(if (and (not space?)
(> token-end token-start))
(cons (cons (substring str token-start token-end)
token-pos)
res)
res))))
(else
(let ((next-idx (utf8-string-next-char str src-idx)))
(loop next-idx
(if space?
src-idx
token-start)
next-idx
(if space?
1
(add1 token-pos))
state
#f
(if space?
(cons (cons (substring str token-start token-end)
token-pos)
res)
res))))))))))
;; Converts a SGR list into a single string
(define* (sgr-list->string lst
(initial-state empty-sgr-state)
#:reset-state (reset-state #f))
(let loop ((lst lst)
(res '())
(state initial-state))
(if (null? lst)
(string-intersperse
(reverse (if reset-state
(cons reset-state res)
res))
"")
(let ((token (car lst)))
(if (sgr-state? token)
(loop (cdr lst)
(cons (sgr-state-change->string state token) res)
token)
(loop (cdr lst)
(cons (car token) res)
state))))))
;; Predicate for SGR list tokens
(define (sgr-list-token? v)
(and (pair? v)
(string? (car v))
(fixnum? (cdr v))))
;; Returns the length of all utf8 strings in the sgr-list
(define (sgr-list-length sl)
(foldl
(lambda (acc tk)
(if (sgr-list-token? tk)
(+ acc (cdr tk))
acc))
0 sl))
;; Returns the length of all utf8 strings without glues. Glues must
;; have at least width 1 if they are not the first or last
;; token. Those can have zero length. All spaces have width of
;; 1. Used for wrapping blocks.
(define (sgr-list-length/stretch sl)
(let loop ((sl sl)
(len 0)
(seen-string? #f)
(last-space? #f))
(if (null? sl)
(if last-space?
(sub1 len)
len)
(let* ((tk (car sl))
(is-spaces? (sgr-token-spaces? tk))
(is-string? (and (not (sgr-state? tk))
(not is-spaces?))))
(loop (cdr sl)
(cond ((sgr-token-spaces? tk)
(if seen-string?
(add1 len)
len))
((sgr-state? tk)
len)
(else
(+ len (cdr tk))))
(or seen-string?
is-string?)
(if is-string?
#f
(if is-spaces?
#t
last-space?)))))))
;; Returns length without glues
(define (sgr-list-length-w/o-glues sl)
(foldl (lambda (acc tk)
(if (or (sgr-state? tk)
(sgr-token-glue? tk)
(sgr-token-unglue? tk))
acc
(+ acc (cdr tk))))
0 sl))
;; Predicate for string token containing only spaces and tabs
(define (sgr-token-spaces? t)
(and (pair? t)
(string? (car t))
(let loop ((i 0))
(if (= i (cdr t))
#t
(if (memq (string-ref (car t) i) '(#\space #\tab))
(loop (add1 i))
#f)))))
;; Predicate for string token with only spaces and at least one tab
(define (sgr-token-glue? t)
(and (pair? t)
(string? (car t))
(let loop ((i 0)
(glue #f))
(if (= i (cdr t))
glue
(let ((ch (string-ref (car t) i)))
(if (eq? ch #\tab)
(loop (add1 i) #t)
(if (eq? ch #\space)
(loop (add1 i) glue)
#f)))))))
;; Predicate for string containing only a newline
(define (sgr-token-newline? t)
(and (pair? t)
(string? (car t))
(eq? (cdr t) 1)
(eq? (string-ref (car t) 0) #\newline)))
;; Predicate for unglue (used for justify)
(define (sgr-token-unglue? t)
(and (pair? t)
(string? (car t))
(eq? (cdr t) 1)
(eq? (string-ref (car t) 0) #\backspace)))
;; Replaces all occurences of #\tab with #\space and removes trailing
;; #\backspace
(define (sgr-token-neutralize t)
(if (and (pair? t)
(string? (car t)))
(let* ((str (string-copy (car t)))
(len (string-length str)))
(if (> len 0)
(let loop ((idx 0))
(if (= idx len)
(if (eq? (string-ref str (sub1 len)) #\backspace)
(cons (substring str 0 (sub1 len))
(sub1 len))
(cons str len))
(let ((ch (string-ref str idx)))
(when (eq? ch #\tab)
(string-set! str idx #\space))
(loop (add1 idx)))))
t))))
;; Neutralizes whole SGR list
(define (sgr-list-neutralize sl)
(map sgr-token-neutralize sl))
;; Returns the number of glues in given SGR list
(define (sgr-list-num-glues sl)
(foldl (lambda (acc t)
(if (sgr-token-glue? t)
(add1 acc)
acc))
0 sl))
;; Returns the longest word
(define (sgr-list-min-width sl)
(let loop ((sl sl)
(res 0))
(if (null? sl)
res
(let ((tk (car sl)))
(loop (cdr sl)
(if (sgr-state? tk)
res
(max res (cdr tk))))))))
;; Returns the last SGR state in given sgr-list
(define* (sgr-list-last-state sl (initial-state empty-sgr-state))
(let loop ((sl sl)
(state initial-state))
(if (null? sl)
state
(loop (cdr sl)
(if (sgr-state? (car sl))
(car sl)
state)))))
;; Module self-tests
(define (sgr-list-tests!)
(run-tests
sgr-list
(test-equal? sgr-list->string
(sgr-list->string '(("Hello" . 5)))
"Hello")
(test-true sgr-list-token?
(sgr-list-token? '("Hello" . 5)))
(test-false sgr-list-token?
(sgr-list-token? empty-sgr-state))
(test-false sgr-list-token?
(sgr-list-token? (set-sgr-state-foreground
empty-sgr-state
(make-sgr-truecolor 1 2 3))))
(test-equal? sgr-list-length
(sgr-list-length '(("Hello" . 5)))
5)
(test-true sgr-token-spaces?
(sgr-token-spaces? '(" " . 5)))
(test-true sgr-token-spaces?
(sgr-token-spaces? '(" \x09 " . 5)))
(test-false sgr-token-spaces?
(sgr-token-spaces? '(" x " . 5)))
(test-true sgr-token-newline?
(sgr-token-newline? '("\n" . 1)))
(test-false sgr-token-newline?
(sgr-token-newline? '("\na" . 2)))
(test-false sgr-token-newline?
(sgr-token-newline? '("x" . 1)))
(test-true sgr-token-glue?
(sgr-token-glue? '(" \t " . 5)))
(test-true sgr-token-glue?
(sgr-token-glue? '("\t" . 1)))
(test-false sgr-token-glue?
(sgr-token-glue? '(" " . 5)))
(test-false sgr-token-glue?
(sgr-token-glue? '(" x " . 5)))
(test-equal? string->sgr-list
(string->sgr-list "Hello")
'(("Hello" . 5)))
(test-equal? string->sgr-list
(string->sgr-list "\x1b[1mHello \x1b[0mWorld!")
'(1 ("Hello " . 6) 0 ("World!" . 6)))
(test-equal? string->sgr-list/words
(string->sgr-list/words "Hello World!")
'(("Hello" . 5)
(" " . 1)
("World!" . 6)))
(test-equal? string->sgr-list/words
(string->sgr-list/words "\x1b[1mHello \x1b[0mWorld!")
'(1
("Hello" . 5)
(" " . 1)
0
("World!" . 6)))
(test-equal? string->sgr-list/words
(string->sgr-list/words "Hello World!\b")
'(("Hello" . 5)
(" " . 1)
("World!" . 6)
("\b" . 1)))
(test-equal? string->sgr-list/words
(string->sgr-list/words "Hello\nWorld!")
'(("Hello" . 5)
("\n" . 1)
("World!" . 6)))
(test-equal? sgr-list-length/stretch
(sgr-list-length/stretch
'(("Hello" . 5)))
5)
(test-equal? sgr-list-length/stretch
(sgr-list-length/stretch
'(("Hello" . 5) (" " . 4)))
5)
(test-equal? sgr-list-length/stretch
(sgr-list-length/stretch
'(("Hello" . 5) (" " . 4) 1))
5)
(test-equal? sgr-list-length/stretch
(sgr-list-length/stretch
'(1 (" " . 4) ("Hello" . 5) (" " . 4) 1))
5)
(test-equal? sgr-list-length/stretch
(sgr-list-length/stretch
'(("Hello" . 5) (" " . 4) ("World" . 5)))
11)
(test-equal? sgr-list-length-w/o-glues
(sgr-list-length-w/o-glues
'(("Hello" . 5) (" \t " . 5) ("World" . 5)))
10)
(test-true sgr-token-unglue?
(sgr-token-unglue? '("\b" . 1)))
(test-false sgr-token-unglue?
(sgr-token-unglue? '(" \b" . 2)))
(test-equal? sgr-token-neutralize
(sgr-token-neutralize '("Hello\b" . 6))
'("Hello" . 5))
))
)