hackerbase/csv-simple.scm

102 lines
3.2 KiB
Scheme

;;
;; csv-simple.scm
;;
;; Simple, incomplete and incorrect but fast CSV loader.
;;
;; ISC License
;;
;; Copyright 2023 Brmlab, z.s.
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
;;
;; Permission to use, copy, modify, and/or distribute this software
;; for any purpose with or without fee is hereby granted, provided
;; that the above copyright notice and this permission notice appear
;; in all copies.
;;
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
;;
(declare (unit csv-simple))
(module
csv-simple
(
csv-parse
csv-simple-tests!
)
(import scheme
(chicken base)
(chicken keyword)
(chicken io)
(chicken irregex)
testing)
;; Curry version of line parser with configurable cell separator and
;; string delimiter. Returns a list of lists of strings.
(define ((make-csv-line-parser separator string-delimiter) line)
(let loop ((tokens (irregex-extract (irregex "." 'u) line))
(res '())
(state 1))
(if (null? tokens)
(reverse res)
(let ((token (car tokens)))
(case state
((0) ; Parsing regular unquoted cell data - separator creates new cell
(if (equal? token separator)
(loop (cdr tokens)
res
1) ; Start a new cell
(loop (cdr tokens)
(cons (string-append (car res) token) (cdr res))
0)))
((1) ; Starting a new cell - check for string delimiter
(if (equal? token string-delimiter)
(loop (cdr tokens)
(cons "" res) ; If it is quoted, keep even empty strings there
2)
(if (equal? token separator)
(loop (cdr tokens)
(cons "" res) ; This was an empty cell
1) ; Another new cell awaiting
(loop (cdr tokens)
(cons token res) ; first token of regular new cell
0))))
((2) ; Parsing quoted cell data - no support for escaping string delimiter!
(if (equal? token string-delimiter)
(loop (cdr tokens)
res
0) ; There shouldn't be anything more, but it is safe to append the rest as normal unquoted data
(loop (cdr tokens)
(cons (string-append (car res) token) (cdr res))
2)))))))) ; Continue inside quoted data
;; Loads given CSV file and parses its lines into lists
(define (csv-parse fn . args)
(let* ((separator (get-keyword #:separator args (lambda () ";")))
(string-delimiter (get-keyword #:string-delimiter args (lambda () "\"")))
(lines (read-lines (open-input-file fn)))
(csv-parse-line (make-csv-line-parser separator string-delimiter)))
(map csv-parse-line lines)))
;; Module self-tests
(define (csv-simple-tests!)
(run-tests
csv-simple
(test-equal? csv-parse-line
((make-csv-line-parser ";" "\"") "test;2;3")
'("test" "2" "3"))
(test-equal? csv-parse-line
((make-csv-line-parser ";" "\"") "test;\"2;quoted\";3")
'("test" "2;quoted" "3"))
))
)