158 lines
4.8 KiB
Scheme
158 lines
4.8 KiB
Scheme
;;
|
|
;; util-parser.scm
|
|
;;
|
|
;; Common parts of the parsers.
|
|
;;
|
|
;; ISC License
|
|
;;
|
|
;; Copyright 2023 Brmlab, z.s.
|
|
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
|
|
;;
|
|
;; Permission to use, copy, modify, and/or distribute this software
|
|
;; for any purpose with or without fee is hereby granted, provided
|
|
;; that the above copyright notice and this permission notice appear
|
|
;; in all copies.
|
|
;;
|
|
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
;;
|
|
|
|
(declare (unit util-parser))
|
|
|
|
(import duck)
|
|
|
|
(module*
|
|
util-parser
|
|
#:doc ("This module contains common functions for both configuration and
|
|
member file parsers. All functions are UTF-8 aware.")
|
|
(
|
|
parser-preprocess-line
|
|
parser-parse-line
|
|
parser-tests!
|
|
)
|
|
|
|
(import scheme
|
|
(chicken base)
|
|
racket-kwargs
|
|
testing)
|
|
|
|
;; Pass 0: Removes any comments and removes any leading and trailing
|
|
;; whitespace.
|
|
(define*/doc (parser-preprocess-line line #:strip-comments? (strip-comments? #t))
|
|
("* ```line``` - a string with contents of one source line
|
|
|
|
If the input ```line``` contains the ```#``` character, the rest of
|
|
the line (including this character) is removed.
|
|
|
|
Any leading and trailing space is removed.
|
|
|
|
Returns a string representing the preprocessed line.")
|
|
(let* ((llen (string-length line))
|
|
(ppos (let ploop ((pidx 0))
|
|
(if (or (= pidx llen)
|
|
(not (let ((ch (string-ref line pidx)))
|
|
(or (eq? ch #\space)
|
|
(eq? ch #\tab)))))
|
|
pidx
|
|
(ploop (add1 pidx)))))
|
|
(hpos (let hloop ((hidx ppos))
|
|
(if (or (= hidx llen)
|
|
(and (or strip-comments?
|
|
(= hidx 0))
|
|
(eq? (string-ref line hidx) #\#)))
|
|
hidx
|
|
(hloop (add1 hidx)))))
|
|
(spos (let sloop ((sidx (sub1 hpos)))
|
|
(if (< sidx ppos)
|
|
ppos
|
|
(let ((ch (string-ref line sidx)))
|
|
(if (or (eq? ch #\space)
|
|
(eq? ch #\tab))
|
|
(sloop (sub1 sidx))
|
|
(add1 sidx)))))))
|
|
(substring line ppos spos)))
|
|
|
|
;; Pass 1: Expects line with comments and surrounding whitespace
|
|
;; removed, returns either #f if nothing was parsed, symbol if only
|
|
;; one token was there and pair of symbol and string if both key and
|
|
;; the value were present.
|
|
(define/doc (parser-parse-line line)
|
|
("* ```line``` - preprocessed line (string)
|
|
|
|
If the ```line``` is empty, returns ```#f```.
|
|
|
|
If the line contains only one token consisting of non-whitespace
|
|
characters before the first whitespace character (there is no
|
|
whitespace), returns a symbol created by interning the whole
|
|
```line```.
|
|
|
|
When the ```line``` contains whitespace character(s), it returns a
|
|
pair consisting of symbol created by interning the string of
|
|
non-whitespace characters before the first whitespace character and
|
|
the string with the rest of the line.")
|
|
(let* ((llen (string-length line))
|
|
(spos (let sloop ((sidx 0))
|
|
(if (or (= sidx llen)
|
|
(let ((ch (string-ref line sidx)))
|
|
(or (eq? ch #\space)
|
|
(eq? ch #\tab))))
|
|
sidx
|
|
(sloop (add1 sidx)))))
|
|
(vpos (let vloop ((vidx spos))
|
|
(if (or (= vidx llen)
|
|
(not (let ((ch (string-ref line vidx)))
|
|
(or (eq? ch #\space)
|
|
(eq? ch #\tab)))))
|
|
vidx
|
|
(vloop (add1 vidx))))))
|
|
(if (= llen 0)
|
|
#f
|
|
(if (< vpos llen)
|
|
(cons (string->symbol
|
|
(substring line 0 spos))
|
|
(substring line vpos))
|
|
(string->symbol
|
|
(substring line 0 spos))))))
|
|
|
|
;; Self-tests
|
|
(define (parser-tests!)
|
|
(run-tests
|
|
parser
|
|
(test-equal? parser-preprocess-line
|
|
(parser-preprocess-line "# all comment")
|
|
"")
|
|
(test-equal? parser-preprocess-line
|
|
(parser-preprocess-line " # all comment after spaces")
|
|
"")
|
|
(test-equal? parser-preprocess-line
|
|
(parser-preprocess-line " test # spaces and comment after spaces")
|
|
"test")
|
|
(test-equal? parser-preprocess-line
|
|
(parser-preprocess-line "key value # spaces and comment after spaces")
|
|
"key value")
|
|
(test-false parser-parse-line
|
|
(parser-parse-line ""))
|
|
(test-eq? parser-parse-line
|
|
(parser-parse-line "key")
|
|
'key)
|
|
(test-equal? parser-parse-line
|
|
(parser-parse-line "key value")
|
|
'(key . "value"))
|
|
(test-equal? parser-parse-line
|
|
(parser-parse-line "key value")
|
|
'(key . "value"))
|
|
(test-equal? parser-parse-line
|
|
(parser-parse-line "key value and some")
|
|
'(key . "value and some"))
|
|
(test-equal? parser-parse-line
|
|
(parser-parse-line "key value lot of spaces")
|
|
'(key . "value lot of spaces"))
|
|
))
|
|
|
|
)
|