Heavily optimize CSV reader.
This commit is contained in:
parent
a216b27825
commit
306cca2ae3
1 changed files with 18 additions and 15 deletions
|
@ -41,49 +41,52 @@
|
||||||
testing
|
testing
|
||||||
progress)
|
progress)
|
||||||
|
|
||||||
;; Curry version of line parser with configurable cell separator and
|
;; Curry version of line parser with configurable cell separator and
|
||||||
;; string delimiter. Returns a list of lists of strings.
|
;; string delimiter. Returns a list of lists of strings.
|
||||||
(define ((make-csv-line-parser separator string-delimiter) line)
|
(define ((make-csv-line-parser separator string-delimiter) line)
|
||||||
(let loop ((tokens (irregex-extract (irregex "." 'u) line))
|
(let loop ((tokens (string->list line))
|
||||||
(res '())
|
(res '())
|
||||||
(state 1))
|
(state 1))
|
||||||
(if (null? tokens)
|
(if (null? tokens)
|
||||||
(reverse res)
|
(reverse
|
||||||
|
(map
|
||||||
|
(lambda (cell)
|
||||||
|
(list->string (reverse cell)))
|
||||||
|
res))
|
||||||
(let ((token (car tokens)))
|
(let ((token (car tokens)))
|
||||||
(case state
|
(case state
|
||||||
((0) ; Parsing regular unquoted cell data - separator creates new cell
|
((0) ; Parsing regular unquoted cell data - separator creates new cell
|
||||||
(if (equal? token separator)
|
(if (eq? token separator)
|
||||||
(loop (cdr tokens)
|
(loop (cdr tokens)
|
||||||
res
|
res
|
||||||
1) ; Start a new cell
|
1) ; Start a new cell
|
||||||
(loop (cdr tokens)
|
(loop (cdr tokens)
|
||||||
(cons (string-append (car res) token) (cdr res))
|
(cons (cons token (car res)) (cdr res))
|
||||||
0)))
|
0)))
|
||||||
((1) ; Starting a new cell - check for string delimiter
|
((1) ; Starting a new cell - check for string delimiter
|
||||||
(if (equal? token string-delimiter)
|
(if (eq? token string-delimiter)
|
||||||
(loop (cdr tokens)
|
(loop (cdr tokens)
|
||||||
(cons "" res) ; If it is quoted, keep even empty strings there
|
(cons '() res) ; If it is quoted, keep even empty strings there
|
||||||
2)
|
2)
|
||||||
(if (equal? token separator)
|
(if (eq? token separator)
|
||||||
(loop (cdr tokens)
|
(loop (cdr tokens)
|
||||||
(cons "" res) ; This was an empty cell
|
(cons '() res) ; This was an empty cell
|
||||||
1) ; Another new cell awaiting
|
1) ; Another new cell awaiting
|
||||||
(loop (cdr tokens)
|
(loop (cdr tokens)
|
||||||
(cons token res) ; first token of regular new cell
|
(cons (list token) res) ; first token of regular new cell
|
||||||
0))))
|
0))))
|
||||||
((2) ; Parsing quoted cell data - no support for escaping string delimiter!
|
((2) ; Parsing quoted cell data - no support for escaping string delimiter!
|
||||||
(if (equal? token string-delimiter)
|
(if (eq? token string-delimiter)
|
||||||
(loop (cdr tokens)
|
(loop (cdr tokens)
|
||||||
res
|
res
|
||||||
0) ; There shouldn't be anything more, but it is safe to append the rest as normal unquoted data
|
0) ; There shouldn't be anything more, but it is safe to append the rest as normal unquoted data
|
||||||
(loop (cdr tokens)
|
(loop (cdr tokens)
|
||||||
(cons (string-append (car res) token) (cdr res))
|
(cons (cons token (car res)) (cdr res))
|
||||||
2)))))))) ; Continue inside quoted data
|
2)))))))) ; Continue inside quoted data
|
||||||
|
|
||||||
;; Parses given CSV lines list
|
;; Parses given CSV lines list
|
||||||
(define (csv-parse-lines lines . args)
|
(define (csv-parse-lines lines . args)
|
||||||
(let* ((separator (get-keyword #:separator args (lambda () ";")))
|
(let* ((separator (get-keyword #:separator args (lambda () #\;)))
|
||||||
(string-delimiter (get-keyword #:string-delimiter args (lambda () "\"")))
|
(string-delimiter (get-keyword #:string-delimiter args (lambda () #\")))
|
||||||
(csv-parse-line (make-csv-line-parser separator string-delimiter))
|
(csv-parse-line (make-csv-line-parser separator string-delimiter))
|
||||||
(total (max (sub1 (length lines)) 1)))
|
(total (max (sub1 (length lines)) 1)))
|
||||||
(let loop ((lines lines)
|
(let loop ((lines lines)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue