hackerbase/fetch_fio.sh

309 lines
7.1 KiB
Bash

#!/bin/sh
#
# fetch_fio.sh
#
# Fio API account statements fetcher.
#
# ISC License
#
# Copyright 2023 Brmlab, z.s.
# Jan Hrach
# Dominik Pantůček <dominik.pantucek@trustica.cz>
#
# Permission to use, copy, modify, and/or distribute this software
# for any purpose with or without fee is hereby granted, provided
# that the above copyright notice and this permission notice appear
# in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# REST API endpoint
APIURI=https://www.fio.cz/ib_api/rest
# Configuration defaults - none
# TODO: quick fix
CONFIG_FILE=/home/hackerbase/.hackerbaserc
CFG_BANK_DIR=
CFG_APIKEYS_FILE=
ARG_BANK_DIR=
ARG_APIKEYS_FILE=
# Argument parsing
while ! [ -z "$1" ] ; do
case "$1" in
-apikey)
ARG_APIKEYS_FILE="$2"
shift
shift
;;
-bankdir)
ARG_BANK_DIR="$2"
shift
shift
;;
-config)
CONFIG_FILE="$2"
shift
shift
;;
*)
echo "Usage: $0 [-config file] [-apikey file] [-bankdir dir]"
exit 1
;;
esac
done
# Configuration parsing - assumes "dumb" sed which cannot execute
# multiple statements
get_config_value() {
if [ -z "$1" ] ; then
echo "get_config_value() requires parameter name"
exit 1
fi
if [ -r "$CONFIG_FILE" ] ; then
cat "$CONFIG_FILE" \
| sed 's/#.*//' \
| sed 's/^[ \t]*//' \
| grep "^$1" \
| sed 's/^[^ \t]*//' \
| sed 's/^[ \t]*//' \
| sed 's/[ \t]*$//'
fi
}
CFG_BANK_DIR=`get_config_value bank-dir`
CFG_APIKEYS_FILE=`get_config_value apikeys-file`
# Configuration merging
if [ -z "$ARG_BANK_DIR" ] ; then
BANK_DIR="$CFG_BANK_DIR"
else
BANK_DIR="$ARG_BANK_DIR"
fi
if [ -z "$ARG_APIKEYS_FILE" ] ; then
APIKEYS_FILE="$CFG_APIKEYS_FILE"
else
APIKEYS_FILE="$ARG_APIKEYS_FILE"
fi
# Storage for partial account statements
BANK_DIR_PARTS="$BANK_DIR/parts"
if ! [ -d "$BANK_DIR_PARTS" ] ; then
mkdir -p "$BANK_DIR_PARTS"
fi
#
# Very simple "logging" function (stdout should be redirected to log anyway)
log() {
echo `date '+%Y-%m-%d %H:%M:%S'` "$@"
}
#
# Returns the file modification date in YYYY-MM-DD format
get_file_date() {
if [ -r "$1" ] ; then
STAT=`stat -c %y "$1"`
echo ${STAT%% *}
else
echo 2000-01-01
fi
}
#
# $1 - URI
# $2 - output file
download_file() {
url="$1"
fname="$2"
tmpfname="$fname.tmp"
oldfname="$fname.old"
for i in `seq 1 3` ; do
if wget -q "$url" -O "$tmpfname" ; then
if [ -s "$tmpfname" ] ; then
log Download OK
if [ -r "$fname" ] ; then
cp "$fname" "$oldfname"
fi
mv "$tmpfname" "$fname"
log Rename OK
break
else
log Download successfull but empty or non-existing result.
log Retrying in 5 s.
fi
else
log Failed download, retrying in 5 s.
sleep 5
fi
done
}
#
# Downloads single year
# $1 - apikey
# $2 - year
# $3 - destination file name
download_year() {
log "Downloading ${APIURI}/periods/.../$year-01-01/$year-12-31/transactions.csv to $fname"
apikey="$1"
year="$2"
fname="$3"
download_file "${APIURI}/periods/$apikey/$year-01-01/$year-12-31/transactions.csv" "$fname"
}
#
# Gets the first year of the account statement.
# $1 - account number
first_acc_part() {
accno="$1"
ls "${BANK_DIR_PARTS}/" \
| grep "^$accno" \
| grep '.csv$' \
| sort \
| head -n 1
}
#
# Gets the last year of the account statement.
# $1 - account number
last_acc_part() {
accno="$1"
ls "${BANK_DIR_PARTS}/" \
| grep "^$accno" \
| grep '.csv$' \
| sort -r \
| head -n 1
}
#
# Returns all parts except for the first
# $1 - account number
all_but_first_parts() {
accno="$1"
first="`first_acc_part $1`"
ls "${BANK_DIR_PARTS}/" \
| grep "^$accno" \
| grep '.csv$' \
| sort \
| grep -v "$first"
}
#
# Creates static header for given account
# $1 - account number
make_acc_header_static() {
grep -B 20 '^$' "$1" \
| grep . \
| egrep -v 'Balance|^date|^id'
}
#
# Gets only given header
# $1 - path to file
# $2 - header name
get_header_field() {
grep "^$2" "$1"
}
#
# Dynamic part
# $1 - account number
make_acc_header() {
first="`first_acc_part $1`"
last="`last_acc_part $2`"
firstfname="$BANK_DIR_PARTS/$first"
lastfname="$BANK_DIR_PARTS/$last"
make_acc_header_static "$firstfname"
get_header_field "$firstfname" openingBalance
get_header_field "$lastfname" closingBalance
get_header_field "$firstfname" dateStart
get_header_field "$lastfname" dateEnd
get_header_field "$firstfname" idFrom
get_header_field "$lastfname" idTo
}
#
# Creates the complete merged account statement
# $1 - account number
merge_acc() {
make_acc_header "$1"
grep -A 100000 '^$' "$BANK_DIR_PARTS/`first_acc_part $1`"
for part in `all_but_first_parts $1` ; do
grep -A 100000 '^ID' "$BANK_DIR_PARTS/$part" \
| grep -v '^ID'
done
}
# Current year (last in seq)
CYEAR=`date +%Y`
# Mark
log "$0" ======== started ========
# Each line should contain account number and Fio API token as first
# two non-whitespace strings. Third token is the starting year for
# this account. The rest of each line is ignored. There must be no
# leading whitespace.
while read accnt ; do
# Extract account number, Fio token and starting year
ACCNO=${accnt%% *}
accrest=${accnt#* }
APIKEY=${accrest%% *}
accrest2=${accrest#* }
YEAR=${accrest2%% *}
# Check starting year before attempting partial downloads
if [ -z "$YEAR" ] ; then
log "Missing start year for account $ACCNO"
continue
fi
if [ "$YEAR" -gt "$CYEAR" ] ; then
log "Start year for account $ACCNO in the future: $YEAR"
continue
fi
if [ "$YEAR" -lt "2010" ] ; then
log "Start year for account $ACCNO before Brmlab existence: $YEAR"
continue
fi
# Iterate over years
for year in `seq $YEAR $CYEAR` ; do
# Check whether it needs fetching
CSVNAME="$BANK_DIR_PARTS/$ACCNO-$year.csv"
FILE_DATE=`get_file_date "$CSVNAME"`
NEXT_YEAR=`expr $year + 1`
MIN_DATE="$NEXT_YEAR-01-02"
FILE_TS=`date -d $FILE_DATE +%s`
MIN_TS=`date -d $MIN_DATE +%s`
if [ "$FILE_TS" -lt "$MIN_TS" ] ; then
log "Update $ACCNO in $year - updated $FILE_DATE, needs $MIN_DATE"
download_year "$APIKEY" "$year" "$CSVNAME"
if [ "$year" -lt "$CYEAR" ] ; then
log "Sleeping for 30s before next API usage"
sleep 30
fi
else
log "Skipping $ACCNO in $year - already latest: $FILE_DATE"
fi
done
# Merge the account
log "Merging $ACCNO"
merge_acc "$ACCNO" >"$BANK_DIR/$ACCNO.csv.tmp"
log "Renaming $ACCNO"
if [ -r "$BANK_DIR/$ACCNO.csv" ] ; then
cp "$BANK_DIR/$ACCNO.csv" "$BANK_DIR/$ACCNO.csv.old"
fi
mv "$BANK_DIR/$ACCNO.csv.tmp" "$BANK_DIR/$ACCNO.csv"
done < "$APIKEYS_FILE"
# Mark
log "$0" ======== finished ========