161 lines
4.6 KiB
Scheme
161 lines
4.6 KiB
Scheme
;;
|
|
;; util-csv.scm
|
|
;;
|
|
;; Simple, incomplete and incorrect but fast CSV loader.
|
|
;;
|
|
;; ISC License
|
|
;;
|
|
;; Copyright 2023 Brmlab, z.s.
|
|
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
|
|
;;
|
|
;; Permission to use, copy, modify, and/or distribute this software
|
|
;; for any purpose with or without fee is hereby granted, provided
|
|
;; that the above copyright notice and this permission notice appear
|
|
;; in all copies.
|
|
;;
|
|
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
;;
|
|
|
|
(declare (unit util-csv))
|
|
|
|
(import duck)
|
|
|
|
(module*
|
|
util-csv
|
|
#:doc ("This module provides a very simple, incomplete and incorrect but fast
|
|
CSV loader.")
|
|
(
|
|
make-csv-line-parser
|
|
csv-parse
|
|
csv-split-header
|
|
csv-simple-tests!
|
|
)
|
|
|
|
(import scheme
|
|
(chicken base)
|
|
(chicken keyword)
|
|
(chicken irregex)
|
|
(chicken condition)
|
|
testing
|
|
progress
|
|
util-io
|
|
racket-kwargs)
|
|
|
|
(define/doc ((make-csv-line-parser separator string-delimiter) line)
|
|
("Curried version of fast CSV line parser with given separator and string delimiter.
|
|
|
|
* ```separator``` - separator character
|
|
* ```string-delimiger``` - string quotation character
|
|
* ```line``` - line to parse")
|
|
(let loop ((tokens (string->list line))
|
|
(res '())
|
|
(state 1))
|
|
(if (null? tokens)
|
|
(reverse
|
|
(map
|
|
(lambda (cell)
|
|
(list->string (reverse cell)))
|
|
res))
|
|
(let ((token (car tokens)))
|
|
(case state
|
|
((0) ; Parsing regular unquoted cell data - separator creates new cell
|
|
(if (eq? token separator)
|
|
(loop (cdr tokens)
|
|
res
|
|
1) ; Start a new cell
|
|
(loop (cdr tokens)
|
|
(cons (cons token (car res)) (cdr res))
|
|
0)))
|
|
((1) ; Starting a new cell - check for string delimiter
|
|
(if (eq? token string-delimiter)
|
|
(loop (cdr tokens)
|
|
(cons '() res) ; If it is quoted, keep even empty strings there
|
|
2)
|
|
(if (eq? token separator)
|
|
(loop (cdr tokens)
|
|
(cons '() res) ; This was an empty cell
|
|
1) ; Another new cell awaiting
|
|
(loop (cdr tokens)
|
|
(cons (list token) res) ; first token of regular new cell
|
|
0))))
|
|
((2) ; Parsing quoted cell data - no support for escaping string delimiter!
|
|
(if (eq? token string-delimiter)
|
|
(loop (cdr tokens)
|
|
res
|
|
0) ; There shouldn't be anything more, but it is safe to append the rest as normal unquoted data
|
|
(loop (cdr tokens)
|
|
(cons (cons token (car res)) (cdr res))
|
|
2)))))))) ; Continue inside quoted data
|
|
|
|
(define*/doc (csv-parse-lines lines
|
|
#:separator (separator #\;)
|
|
#:string-delimiter (string-delimiter #\"))
|
|
("Parses given lines and returns list of lists of strings.")
|
|
(let* ((csv-parse-line (make-csv-line-parser separator string-delimiter))
|
|
(total (max (sub1 (length lines)) 1)))
|
|
(let loop ((lines lines)
|
|
(idx 0)
|
|
(res '()))
|
|
(if (null? lines)
|
|
(reverse res)
|
|
(let ((line (car lines)))
|
|
(progress%-advance (/ idx total))
|
|
(loop (cdr lines)
|
|
(add1 idx)
|
|
(cons (csv-parse-line line)
|
|
res)))))))
|
|
|
|
(define/doc (csv-parse fn . args)
|
|
("Uses ```csv-parse-lines``` on lines read from given file ```fn```.")
|
|
(call/cc
|
|
(lambda (ret)
|
|
(with-exception-handler
|
|
(lambda (ex)
|
|
(ret #f))
|
|
(lambda ()
|
|
(let ((lines (read-lines/no-bom (open-input-file fn))))
|
|
(apply csv-parse-lines lines args)))))))
|
|
|
|
(define/doc (csv-split-header csv)
|
|
("Splits given loaded CSV into two tables at the first empty row.")
|
|
(let loop ((body csv)
|
|
(rhead '()))
|
|
(if (null? body)
|
|
(list (reverse rhead) '())
|
|
(let ((row (car body)))
|
|
(if (null? row)
|
|
(list (reverse rhead)
|
|
(cdr body))
|
|
(loop (cdr body)
|
|
(cons row rhead)))))))
|
|
|
|
;; Module self-tests
|
|
(define (csv-simple-tests!)
|
|
(run-tests
|
|
csv-simple
|
|
(test-equal? csv-parse-line
|
|
((make-csv-line-parser #\; #\") "test;2;3")
|
|
'("test" "2" "3"))
|
|
(test-equal? csv-parse-line
|
|
((make-csv-line-parser #\; #\") "test;\"2;quoted\";3")
|
|
'("test" "2;quoted" "3"))
|
|
(test-equal? csv-split-header
|
|
(csv-split-header '((1 2) () (3 4)))
|
|
'(((1 2)) ((3 4))))
|
|
(test-equal? csv-split-header
|
|
(csv-split-header '((1 2) (5 6) (3 4)))
|
|
'(((1 2) (5 6) (3 4)) ()))
|
|
(test-equal? csv-parse-lines
|
|
(csv-parse-lines '("a;b;c" "1;2"))
|
|
'(("a" "b" "c") ("1" "2")))
|
|
))
|
|
|
|
)
|
|
|