485 lines
12 KiB
Scheme
485 lines
12 KiB
Scheme
;;
|
|
;; sgr-list.scm
|
|
;;
|
|
;; Intermediate representation of strings with SGR state changes.
|
|
;;
|
|
;; ISC License
|
|
;;
|
|
;; Copyright 2023 Dominik Pantůček <dominik.pantucek@trustica.cz>
|
|
;;
|
|
;; Permission to use, copy, modify, and/or distribute this software
|
|
;; for any purpose with or without fee is hereby granted, provided
|
|
;; that the above copyright notice and this permission notice appear
|
|
;; in all copies.
|
|
;;
|
|
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
;;
|
|
|
|
(declare (unit sgr-list))
|
|
|
|
(module
|
|
sgr-list
|
|
(
|
|
string->sgr-list
|
|
string->sgr-list/words
|
|
|
|
sgr-list->string
|
|
|
|
sgr-list-length
|
|
sgr-list-length/stretch
|
|
sgr-list-length-w/o-glues
|
|
|
|
sgr-token-spaces?
|
|
sgr-token-glue?
|
|
sgr-token-newline?
|
|
sgr-token-unglue?
|
|
|
|
sgr-token-neutralize
|
|
sgr-list-neutralize
|
|
|
|
sgr-list-num-glues
|
|
|
|
sgr-list-min-width
|
|
|
|
sgr-list-last-state
|
|
|
|
sgr-list-tests!
|
|
)
|
|
|
|
(import scheme
|
|
(chicken base)
|
|
(chicken string)
|
|
racket-kwargs
|
|
sgr-state
|
|
testing
|
|
util-utf8)
|
|
|
|
;; Converts given string into a list of string parts and sgr-states
|
|
(define* (string->sgr-list str
|
|
(initial-state empty-sgr-state))
|
|
(let ((src-len (string-length str)))
|
|
(let loop ((src-idx 0)
|
|
(token-start 0)
|
|
(token-end 0)
|
|
(token-pos 0)
|
|
(state initial-state)
|
|
(res '()))
|
|
(if (>= src-idx src-len)
|
|
(reverse
|
|
(if (= token-start token-end)
|
|
res
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res)))
|
|
(let ((ch (string-ref str src-idx)))
|
|
(cond
|
|
((eq? ch #\x1b)
|
|
(let-values (((sgr-parsed next-idx)
|
|
(parse-csi-sgr-sequence+pos str src-idx state)))
|
|
(let ((token (update-sgr-state-from-list state sgr-parsed)))
|
|
(loop next-idx
|
|
next-idx
|
|
next-idx
|
|
0
|
|
token
|
|
(cons token
|
|
(if (= token-start token-end)
|
|
res
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res)))))))
|
|
(else
|
|
(let ((next-idx (utf8-string-next-char str src-idx)))
|
|
(loop next-idx
|
|
token-start
|
|
next-idx
|
|
(add1 token-pos)
|
|
state
|
|
res)))))))))
|
|
|
|
;; Converts given string into a list of string parts and sgr-states,
|
|
;; handling continuous whitespace as separate tokens.
|
|
(define* (string->sgr-list/words str
|
|
(initial-state empty-sgr-state))
|
|
(let ((src-len (string-length str)))
|
|
(let loop ((src-idx 0)
|
|
(token-start 0)
|
|
(token-end 0)
|
|
(token-pos 0)
|
|
(state initial-state)
|
|
(space? #f)
|
|
(res '()))
|
|
(if (>= src-idx src-len)
|
|
(reverse
|
|
(if (= token-start token-end)
|
|
res
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res)))
|
|
(let ((ch (string-ref str src-idx)))
|
|
(cond
|
|
((eq? ch #\x1b)
|
|
(let-values (((sgr-parsed next-idx)
|
|
(parse-csi-sgr-sequence+pos str src-idx state)))
|
|
(let ((token (update-sgr-state-from-list state sgr-parsed)))
|
|
(loop next-idx
|
|
next-idx
|
|
next-idx
|
|
0
|
|
token
|
|
#f
|
|
(cons token
|
|
(if (= token-start token-end)
|
|
res
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res)))))))
|
|
((eq? ch #\backspace)
|
|
(let ((next-idx (add1 src-idx)))
|
|
(loop next-idx
|
|
next-idx
|
|
next-idx
|
|
0
|
|
state
|
|
#f
|
|
(cons (cons "\b" 1)
|
|
(if (= token-start token-end)
|
|
res
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res))))))
|
|
((eq? ch #\newline)
|
|
(let ((next-idx (add1 src-idx)))
|
|
(loop next-idx
|
|
next-idx
|
|
next-idx
|
|
0
|
|
state
|
|
#f
|
|
(cons (cons "\n" 1)
|
|
(if (= token-start token-end)
|
|
res
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res))))))
|
|
((memq ch '(#\space #\tab))
|
|
(let ((next-idx (add1 src-idx)))
|
|
(loop next-idx
|
|
(if space?
|
|
token-start
|
|
src-idx)
|
|
next-idx
|
|
(if space?
|
|
(add1 token-pos)
|
|
1)
|
|
state
|
|
#t
|
|
(if (and (not space?)
|
|
(> token-end token-start))
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res)
|
|
res))))
|
|
(else
|
|
(let ((next-idx (utf8-string-next-char str src-idx)))
|
|
(loop next-idx
|
|
(if space?
|
|
src-idx
|
|
token-start)
|
|
next-idx
|
|
(if space?
|
|
1
|
|
(add1 token-pos))
|
|
state
|
|
#f
|
|
(if space?
|
|
(cons (cons (substring str token-start token-end)
|
|
token-pos)
|
|
res)
|
|
res))))))))))
|
|
|
|
;; Converts a SGR list into a single string
|
|
(define* (sgr-list->string lst
|
|
(initial-state empty-sgr-state)
|
|
#:reset-state (reset-state #f))
|
|
(let loop ((lst lst)
|
|
(res '())
|
|
(state initial-state))
|
|
(if (null? lst)
|
|
(string-intersperse
|
|
(reverse (if reset-state
|
|
(cons reset-state res)
|
|
res))
|
|
"")
|
|
(let ((token (car lst)))
|
|
(if (sgr-state? token)
|
|
(loop (cdr lst)
|
|
(cons (sgr-state-change->string state token) res)
|
|
token)
|
|
(loop (cdr lst)
|
|
(cons (car token) res)
|
|
state))))))
|
|
|
|
;; Predicate for SGR list tokens
|
|
(define (sgr-list-token? v)
|
|
(and (pair? v)
|
|
(string? (car v))
|
|
(fixnum? (cdr v))))
|
|
|
|
;; Returns the length of all utf8 strings in the sgr-list
|
|
(define (sgr-list-length sl)
|
|
(foldl
|
|
(lambda (acc tk)
|
|
(if (sgr-list-token? tk)
|
|
(+ acc (cdr tk))
|
|
acc))
|
|
0 sl))
|
|
|
|
;; Returns the length of all utf8 strings without glues. Glues must
|
|
;; have at least width 1 if they are not the first or last
|
|
;; token. Those can have zero length. All spaces have width of
|
|
;; 1. Used for wrapping blocks.
|
|
(define (sgr-list-length/stretch sl)
|
|
(let loop ((sl sl)
|
|
(len 0)
|
|
(seen-string? #f)
|
|
(last-space? #f))
|
|
(if (null? sl)
|
|
(if last-space?
|
|
(sub1 len)
|
|
len)
|
|
(let* ((tk (car sl))
|
|
(is-spaces? (sgr-token-spaces? tk))
|
|
(is-string? (and (not (sgr-state? tk))
|
|
(not is-spaces?))))
|
|
(loop (cdr sl)
|
|
(cond ((sgr-token-spaces? tk)
|
|
(if seen-string?
|
|
(add1 len)
|
|
len))
|
|
((sgr-state? tk)
|
|
len)
|
|
(else
|
|
(+ len (cdr tk))))
|
|
(or seen-string?
|
|
is-string?)
|
|
(if is-string?
|
|
#f
|
|
(if is-spaces?
|
|
#t
|
|
last-space?)))))))
|
|
|
|
;; Returns length without glues
|
|
(define (sgr-list-length-w/o-glues sl)
|
|
(foldl (lambda (acc tk)
|
|
(if (or (sgr-state? tk)
|
|
(sgr-token-glue? tk)
|
|
(sgr-token-unglue? tk))
|
|
acc
|
|
(+ acc (cdr tk))))
|
|
0 sl))
|
|
|
|
|
|
;; Predicate for string token containing only spaces and tabs
|
|
(define (sgr-token-spaces? t)
|
|
(and (pair? t)
|
|
(string? (car t))
|
|
(let loop ((i 0))
|
|
(if (= i (cdr t))
|
|
#t
|
|
(if (memq (string-ref (car t) i) '(#\space #\tab))
|
|
(loop (add1 i))
|
|
#f)))))
|
|
|
|
;; Predicate for string token with only spaces and at least one tab
|
|
(define (sgr-token-glue? t)
|
|
(and (pair? t)
|
|
(string? (car t))
|
|
(let loop ((i 0)
|
|
(glue #f))
|
|
(if (= i (cdr t))
|
|
glue
|
|
(let ((ch (string-ref (car t) i)))
|
|
(if (eq? ch #\tab)
|
|
(loop (add1 i) #t)
|
|
(if (eq? ch #\space)
|
|
(loop (add1 i) glue)
|
|
#f)))))))
|
|
|
|
;; Predicate for string containing only a newline
|
|
(define (sgr-token-newline? t)
|
|
(and (pair? t)
|
|
(string? (car t))
|
|
(eq? (cdr t) 1)
|
|
(eq? (string-ref (car t) 0) #\newline)))
|
|
|
|
;; Predicate for unglue (used for justify)
|
|
(define (sgr-token-unglue? t)
|
|
(and (pair? t)
|
|
(string? (car t))
|
|
(eq? (cdr t) 1)
|
|
(eq? (string-ref (car t) 0) #\backspace)))
|
|
|
|
;; Replaces all occurences of #\tab with #\space and removes trailing
|
|
;; #\backspace
|
|
(define (sgr-token-neutralize t)
|
|
(if (and (pair? t)
|
|
(string? (car t)))
|
|
(let* ((str (string-copy (car t)))
|
|
(len (string-length str)))
|
|
(if (> len 0)
|
|
(let loop ((idx 0))
|
|
(if (= idx len)
|
|
(if (eq? (string-ref str (sub1 len)) #\backspace)
|
|
(cons (substring str 0 (sub1 len))
|
|
(sub1 len))
|
|
(cons str len))
|
|
(let ((ch (string-ref str idx)))
|
|
(when (eq? ch #\tab)
|
|
(string-set! str idx #\space))
|
|
(loop (add1 idx)))))
|
|
t))))
|
|
|
|
;; Neutralizes whole SGR list
|
|
(define (sgr-list-neutralize sl)
|
|
(map sgr-token-neutralize sl))
|
|
|
|
;; Returns the number of glues in given SGR list
|
|
(define (sgr-list-num-glues sl)
|
|
(foldl (lambda (acc t)
|
|
(if (sgr-token-glue? t)
|
|
(add1 acc)
|
|
acc))
|
|
0 sl))
|
|
|
|
;; Returns the longest word
|
|
(define (sgr-list-min-width sl)
|
|
(let loop ((sl sl)
|
|
(res 0))
|
|
(if (null? sl)
|
|
res
|
|
(let ((tk (car sl)))
|
|
(loop (cdr sl)
|
|
(if (sgr-state? tk)
|
|
res
|
|
(max res (cdr tk))))))))
|
|
|
|
;; Returns the last SGR state in given sgr-list
|
|
(define* (sgr-list-last-state sl (initial-state empty-sgr-state))
|
|
(let loop ((sl sl)
|
|
(state initial-state))
|
|
(if (null? sl)
|
|
state
|
|
(loop (cdr sl)
|
|
(if (sgr-state? (car sl))
|
|
(car sl)
|
|
state)))))
|
|
|
|
;; Module self-tests
|
|
(define (sgr-list-tests!)
|
|
(run-tests
|
|
sgr-list
|
|
(test-equal? sgr-list->string
|
|
(sgr-list->string '(("Hello" . 5)))
|
|
"Hello")
|
|
(test-true sgr-list-token?
|
|
(sgr-list-token? '("Hello" . 5)))
|
|
(test-false sgr-list-token?
|
|
(sgr-list-token? empty-sgr-state))
|
|
(test-false sgr-list-token?
|
|
(sgr-list-token? (set-sgr-state-foreground
|
|
empty-sgr-state
|
|
(make-sgr-truecolor 1 2 3))))
|
|
(test-equal? sgr-list-length
|
|
(sgr-list-length '(("Hello" . 5)))
|
|
5)
|
|
(test-true sgr-token-spaces?
|
|
(sgr-token-spaces? '(" " . 5)))
|
|
(test-true sgr-token-spaces?
|
|
(sgr-token-spaces? '(" \x09 " . 5)))
|
|
(test-false sgr-token-spaces?
|
|
(sgr-token-spaces? '(" x " . 5)))
|
|
(test-true sgr-token-newline?
|
|
(sgr-token-newline? '("\n" . 1)))
|
|
(test-false sgr-token-newline?
|
|
(sgr-token-newline? '("\na" . 2)))
|
|
(test-false sgr-token-newline?
|
|
(sgr-token-newline? '("x" . 1)))
|
|
(test-true sgr-token-glue?
|
|
(sgr-token-glue? '(" \t " . 5)))
|
|
(test-true sgr-token-glue?
|
|
(sgr-token-glue? '("\t" . 1)))
|
|
(test-false sgr-token-glue?
|
|
(sgr-token-glue? '(" " . 5)))
|
|
(test-false sgr-token-glue?
|
|
(sgr-token-glue? '(" x " . 5)))
|
|
(test-equal? string->sgr-list
|
|
(string->sgr-list "Hello")
|
|
'(("Hello" . 5)))
|
|
(test-equal? string->sgr-list
|
|
(string->sgr-list "\x1b[1mHello \x1b[0mWorld!")
|
|
'(1 ("Hello " . 6) 0 ("World!" . 6)))
|
|
(test-equal? string->sgr-list/words
|
|
(string->sgr-list/words "Hello World!")
|
|
'(("Hello" . 5)
|
|
(" " . 1)
|
|
("World!" . 6)))
|
|
(test-equal? string->sgr-list/words
|
|
(string->sgr-list/words "\x1b[1mHello \x1b[0mWorld!")
|
|
'(1
|
|
("Hello" . 5)
|
|
(" " . 1)
|
|
0
|
|
("World!" . 6)))
|
|
(test-equal? string->sgr-list/words
|
|
(string->sgr-list/words "Hello World!\b")
|
|
'(("Hello" . 5)
|
|
(" " . 1)
|
|
("World!" . 6)
|
|
("\b" . 1)))
|
|
(test-equal? string->sgr-list/words
|
|
(string->sgr-list/words "Hello\nWorld!")
|
|
'(("Hello" . 5)
|
|
("\n" . 1)
|
|
("World!" . 6)))
|
|
(test-equal? sgr-list-length/stretch
|
|
(sgr-list-length/stretch
|
|
'(("Hello" . 5)))
|
|
5)
|
|
(test-equal? sgr-list-length/stretch
|
|
(sgr-list-length/stretch
|
|
'(("Hello" . 5) (" " . 4)))
|
|
5)
|
|
(test-equal? sgr-list-length/stretch
|
|
(sgr-list-length/stretch
|
|
'(("Hello" . 5) (" " . 4) 1))
|
|
5)
|
|
(test-equal? sgr-list-length/stretch
|
|
(sgr-list-length/stretch
|
|
'(1 (" " . 4) ("Hello" . 5) (" " . 4) 1))
|
|
5)
|
|
(test-equal? sgr-list-length/stretch
|
|
(sgr-list-length/stretch
|
|
'(("Hello" . 5) (" " . 4) ("World" . 5)))
|
|
11)
|
|
(test-equal? sgr-list-length-w/o-glues
|
|
(sgr-list-length-w/o-glues
|
|
'(("Hello" . 5) (" \t " . 5) ("World" . 5)))
|
|
10)
|
|
(test-true sgr-token-unglue?
|
|
(sgr-token-unglue? '("\b" . 1)))
|
|
(test-false sgr-token-unglue?
|
|
(sgr-token-unglue? '(" \b" . 2)))
|
|
(test-equal? sgr-token-neutralize
|
|
(sgr-token-neutralize '("Hello\b" . 6))
|
|
'("Hello" . 5))
|
|
))
|
|
|
|
)
|