hackerbase/src/util-parser.scm

158 lines
4.8 KiB
Scheme

;;
;; util-parser.scm
;;
;; Common parts of the parsers.
;;
;; ISC License
;;
;; Copyright 2023 Brmlab, z.s.
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
;;
;; Permission to use, copy, modify, and/or distribute this software
;; for any purpose with or without fee is hereby granted, provided
;; that the above copyright notice and this permission notice appear
;; in all copies.
;;
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
;;
(declare (unit util-parser))
(import duck)
(module*
util-parser
#:doc ("This module contains common functions for both configuration and
member file parsers. All functions are UTF-8 aware.")
(
parser-preprocess-line
parser-parse-line
parser-tests!
)
(import scheme
(chicken base)
racket-kwargs
testing)
;; Pass 0: Removes any comments and removes any leading and trailing
;; whitespace.
(define*/doc (parser-preprocess-line line #:strip-comments? (strip-comments? #t))
("* ```line``` - a string with contents of one source line
If the input ```line``` contains the ```#``` character, the rest of
the line (including this character) is removed.
Any leading and trailing space is removed.
Returns a string representing the preprocessed line.")
(let* ((llen (string-length line))
(ppos (let ploop ((pidx 0))
(if (or (= pidx llen)
(not (let ((ch (string-ref line pidx)))
(or (eq? ch #\space)
(eq? ch #\tab)))))
pidx
(ploop (add1 pidx)))))
(hpos (let hloop ((hidx ppos))
(if (or (= hidx llen)
(and (or strip-comments?
(= hidx 0))
(eq? (string-ref line hidx) #\#)))
hidx
(hloop (add1 hidx)))))
(spos (let sloop ((sidx (sub1 hpos)))
(if (< sidx ppos)
ppos
(let ((ch (string-ref line sidx)))
(if (or (eq? ch #\space)
(eq? ch #\tab))
(sloop (sub1 sidx))
(add1 sidx)))))))
(substring line ppos spos)))
;; Pass 1: Expects line with comments and surrounding whitespace
;; removed, returns either #f if nothing was parsed, symbol if only
;; one token was there and pair of symbol and string if both key and
;; the value were present.
(define/doc (parser-parse-line line)
("* ```line``` - preprocessed line (string)
If the ```line``` is empty, returns ```#f```.
If the line contains only one token consisting of non-whitespace
characters before the first whitespace character (there is no
whitespace), returns a symbol created by interning the whole
```line```.
When the ```line``` contains whitespace character(s), it returns a
pair consisting of symbol created by interning the string of
non-whitespace characters before the first whitespace character and
the string with the rest of the line.")
(let* ((llen (string-length line))
(spos (let sloop ((sidx 0))
(if (or (= sidx llen)
(let ((ch (string-ref line sidx)))
(or (eq? ch #\space)
(eq? ch #\tab))))
sidx
(sloop (add1 sidx)))))
(vpos (let vloop ((vidx spos))
(if (or (= vidx llen)
(not (let ((ch (string-ref line vidx)))
(or (eq? ch #\space)
(eq? ch #\tab)))))
vidx
(vloop (add1 vidx))))))
(if (= llen 0)
#f
(if (< vpos llen)
(cons (string->symbol
(substring line 0 spos))
(substring line vpos))
(string->symbol
(substring line 0 spos))))))
;; Self-tests
(define (parser-tests!)
(run-tests
parser
(test-equal? parser-preprocess-line
(parser-preprocess-line "# all comment")
"")
(test-equal? parser-preprocess-line
(parser-preprocess-line " # all comment after spaces")
"")
(test-equal? parser-preprocess-line
(parser-preprocess-line " test # spaces and comment after spaces")
"test")
(test-equal? parser-preprocess-line
(parser-preprocess-line "key value # spaces and comment after spaces")
"key value")
(test-false parser-parse-line
(parser-parse-line ""))
(test-eq? parser-parse-line
(parser-parse-line "key")
'key)
(test-equal? parser-parse-line
(parser-parse-line "key value")
'(key . "value"))
(test-equal? parser-parse-line
(parser-parse-line "key value")
'(key . "value"))
(test-equal? parser-parse-line
(parser-parse-line "key value and some")
'(key . "value and some"))
(test-equal? parser-parse-line
(parser-parse-line "key value lot of spaces")
'(key . "value lot of spaces"))
))
)