Add io wrapper handling UTF-8 BOM.

This commit is contained in:
Dominik Pantůček 2023-04-08 20:35:41 +02:00
parent 6b8bffdd41
commit d72cde9c04
5 changed files with 80 additions and 9 deletions

55
src/util-io.scm Normal file
View file

@ -0,0 +1,55 @@
;;
;; util-io.scm
;;
;; Special IO extensions to deal with weird stuff.
;;
;; ISC License
;;
;; Copyright 2023 Brmlab, z.s.
;; Dominik Pantůček <dominik.pantucek@trustica.cz>
;;
;; Permission to use, copy, modify, and/or distribute this software
;; for any purpose with or without fee is hereby granted, provided
;; that the above copyright notice and this permission notice appear
;; in all copies.
;;
;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
;; CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
;; OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
;; NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
;; CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
;;
(declare (unit util-io))
(module
util-io
(
read-lines/no-bom
)
(import scheme
(chicken io))
;; If given string begins with UTF-8 BOM, it is removed.
(define (remove-optional-bom str)
(if (< (string-length str) 3)
str
(let ((maybe-bom (substring str 0 3)))
(if (string=? maybe-bom "\xEF\xBB\xBF")
(substring str 3)
str))))
;; Reads lines from given input port, discarding BOM at the beginning
;; of the first line if there is any.
(define (read-lines/no-bom ip)
(let ((lines (read-lines ip)))
(if (null? lines)
lines
(cons (remove-optional-bom (car lines))
(cdr lines)))))
)