Implement tokenizer and parser.
This commit is contained in:
parent
3c166fddb1
commit
a00b0f6239
1 changed files with 48 additions and 5 deletions
|
@ -33,13 +33,56 @@
|
|||
|
||||
(import scheme
|
||||
(chicken base)
|
||||
(chicken keyword))
|
||||
(chicken keyword)
|
||||
(chicken io)
|
||||
(chicken irregex))
|
||||
|
||||
;; Curry version of line parser with configurable cell separator and
|
||||
;; string delimiter. Returns a list of lists of strings.
|
||||
(define ((make-csv-line-parser separator string-delimiter) line)
|
||||
(let loop ((tokens (irregex-extract (irregex "." 'u) line))
|
||||
(res '())
|
||||
(state 1))
|
||||
(if (null? tokens)
|
||||
(reverse res)
|
||||
(let ((token (car tokens)))
|
||||
(case state
|
||||
((0) ; Parsing regular unquoted cell data - separator creates new cell
|
||||
(if (equal? token separator)
|
||||
(loop (cdr tokens)
|
||||
res
|
||||
1) ; Start a new cell
|
||||
(loop (cdr tokens)
|
||||
(cons (string-append (car res) token) (cdr res))
|
||||
0)))
|
||||
((1) ; Starting a new cell - check for string delimiter
|
||||
(if (equal? token string-delimiter)
|
||||
(loop (cdr tokens)
|
||||
(cons "" res) ; If it is quoted, keep even empty strings there
|
||||
2)
|
||||
(if (equal? token separator)
|
||||
(loop (cdr tokens)
|
||||
(cons "" res) ; This was an empty cell
|
||||
1) ; Another new cell awaiting
|
||||
(loop (cdr tokens)
|
||||
(cons token res) ; first token of regular new cell
|
||||
0))))
|
||||
((2) ; Parsing quoted cell data - no support for escaping string delimiter!
|
||||
(if (equal? token string-delimiter)
|
||||
(loop (cdr tokens)
|
||||
res
|
||||
0) ; There shouldn't be anything more, but it is safe to append the rest as normal unquoted data
|
||||
(loop (cdr tokens)
|
||||
(cons (string-append (car res) token) (cdr res))
|
||||
2)))))))) ; Continue inside quoted data
|
||||
|
||||
;; Loads given CSV file and parses its lines into lists
|
||||
(define (csv-parse fn . args)
|
||||
(let ((separator (get-keyword #:separator args (lambda () ";")))
|
||||
(let* ((separator (get-keyword #:separator args (lambda () ";")))
|
||||
(string-delimiter (get-keyword #:string-delimiter args (lambda () "\"")))
|
||||
(lines (read-lines (open-input-file fn))))
|
||||
#f))
|
||||
(lines (read-lines (open-input-file fn)))
|
||||
(csv-parse-line (make-csv-line-parser separator string-delimiter)))
|
||||
(map csv-parse-line lines)))
|
||||
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue