diff --git a/csv-simple.scm b/csv-simple.scm index 8e99762..19ef1e1 100644 --- a/csv-simple.scm +++ b/csv-simple.scm @@ -33,13 +33,56 @@ (import scheme (chicken base) - (chicken keyword)) + (chicken keyword) + (chicken io) + (chicken irregex)) + ;; Curry version of line parser with configurable cell separator and + ;; string delimiter. Returns a list of lists of strings. + (define ((make-csv-line-parser separator string-delimiter) line) + (let loop ((tokens (irregex-extract (irregex "." 'u) line)) + (res '()) + (state 1)) + (if (null? tokens) + (reverse res) + (let ((token (car tokens))) + (case state + ((0) ; Parsing regular unquoted cell data - separator creates new cell + (if (equal? token separator) + (loop (cdr tokens) + res + 1) ; Start a new cell + (loop (cdr tokens) + (cons (string-append (car res) token) (cdr res)) + 0))) + ((1) ; Starting a new cell - check for string delimiter + (if (equal? token string-delimiter) + (loop (cdr tokens) + (cons "" res) ; If it is quoted, keep even empty strings there + 2) + (if (equal? token separator) + (loop (cdr tokens) + (cons "" res) ; This was an empty cell + 1) ; Another new cell awaiting + (loop (cdr tokens) + (cons token res) ; first token of regular new cell + 0)))) + ((2) ; Parsing quoted cell data - no support for escaping string delimiter! + (if (equal? token string-delimiter) + (loop (cdr tokens) + res + 0) ; There shouldn't be anything more, but it is safe to append the rest as normal unquoted data + (loop (cdr tokens) + (cons (string-append (car res) token) (cdr res)) + 2)))))))) ; Continue inside quoted data + + ;; Loads given CSV file and parses its lines into lists (define (csv-parse fn . args) - (let ((separator (get-keyword #:separator args (lambda () ";"))) - (string-delimiter (get-keyword #:string-delimiter args (lambda () "\""))) - (lines (read-lines (open-input-file fn)))) - #f)) + (let* ((separator (get-keyword #:separator args (lambda () ";"))) + (string-delimiter (get-keyword #:string-delimiter args (lambda () "\""))) + (lines (read-lines (open-input-file fn))) + (csv-parse-line (make-csv-line-parser separator string-delimiter))) + (map csv-parse-line lines))) )