Start splitting out parser utils.

This commit is contained in:
Dominik Pantůček 2023-04-09 10:09:41 +02:00
parent 7a260595d2
commit 46ffd605ea
6 changed files with 83 additions and 29 deletions

66
src/util-parser.scm Normal file
View file

@ -0,0 +1,66 @@
;;
;; util-parser.scm
;;
;; Simple, incomplete and incorrect but fast CSV loader.
;;
;; ISC License
;;
;; Copyright 2023 Brmlab, z.s.
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
;;
;; Permission to use, copy, modify, and/or distribute this software
;; for any purpose with or without fee is hereby granted, provided
;; that the above copyright notice and this permission notice appear
;; in all copies.
;;
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
;;
(declare (unit util-parser))
(module
util-parser
(
parser-preprocess-line
parser-tests!
)
(import scheme
(chicken irregex)
testing)
;; Pass 0: Removes any comments and removes any leading and trailing
;; whitespace.
(define (parser-preprocess-line line)
(irregex-replace (irregex "[ \\t]*$" 'u)
(irregex-replace (irregex "^[ \\t]*" 'u)
(irregex-replace (irregex "#.*$" 'u) line "")
"")
""))
;; Self-tests
(define (parser-tests!)
(run-tests
parser
(test-equal? parser-preprocess-line
(parser-preprocess-line "# all comment")
"")
(test-equal? parser-preprocess-line
(parser-preprocess-line " # all comment after spaces")
"")
(test-equal? parser-preprocess-line
(parser-preprocess-line " test # spaces and comment after spaces")
"test")
(test-equal? parser-preprocess-line
(parser-preprocess-line "key value # spaces and comment after spaces")
"key value")
))
)