;; ;; csv-simple.scm ;; ;; Simple, incomplete and incorrect but fast CSV loader. ;; ;; ISC License ;; ;; Copyright 2023 Brmlab, z.s. ;; Dominik Pantůček ;; ;; Permission to use, copy, modify, and/or distribute this software ;; for any purpose with or without fee is hereby granted, provided ;; that the above copyright notice and this permission notice appear ;; in all copies. ;; ;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL ;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED ;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE ;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR ;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS ;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, ;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ;; (declare (unit csv-simple)) (module csv-simple ( csv-parse csv-simple-tests! ) (import scheme (chicken base) (chicken keyword) (chicken io) (chicken irregex) testing) ;; Curry version of line parser with configurable cell separator and ;; string delimiter. Returns a list of lists of strings. (define ((make-csv-line-parser separator string-delimiter) line) (let loop ((tokens (irregex-extract (irregex "." 'u) line)) (res '()) (state 1)) (if (null? tokens) (reverse res) (let ((token (car tokens))) (case state ((0) ; Parsing regular unquoted cell data - separator creates new cell (if (equal? token separator) (loop (cdr tokens) res 1) ; Start a new cell (loop (cdr tokens) (cons (string-append (car res) token) (cdr res)) 0))) ((1) ; Starting a new cell - check for string delimiter (if (equal? token string-delimiter) (loop (cdr tokens) (cons "" res) ; If it is quoted, keep even empty strings there 2) (if (equal? token separator) (loop (cdr tokens) (cons "" res) ; This was an empty cell 1) ; Another new cell awaiting (loop (cdr tokens) (cons token res) ; first token of regular new cell 0)))) ((2) ; Parsing quoted cell data - no support for escaping string delimiter! (if (equal? token string-delimiter) (loop (cdr tokens) res 0) ; There shouldn't be anything more, but it is safe to append the rest as normal unquoted data (loop (cdr tokens) (cons (string-append (car res) token) (cdr res)) 2)))))))) ; Continue inside quoted data ;; Loads given CSV file and parses its lines into lists (define (csv-parse fn . args) (let* ((separator (get-keyword #:separator args (lambda () ";"))) (string-delimiter (get-keyword #:string-delimiter args (lambda () "\""))) (lines (read-lines (open-input-file fn))) (csv-parse-line (make-csv-line-parser separator string-delimiter))) (map csv-parse-line lines))) ;; Module self-tests (define (csv-simple-tests!) (run-tests csv-simple (test-equal? csv-parse-line ((make-csv-line-parser ";" "\"") "test;2;3") '("test" "2" "3")) (test-equal? csv-parse-line ((make-csv-line-parser ";" "\"") "test;\"2;quoted\";3") '("test" "2;quoted" "3")) )) )