mirror of
https://github.com/brmlab/povoden.git
synced 2025-06-08 18:24:00 +02:00
using xpath to scrape web
This commit is contained in:
parent
55f9618a24
commit
323cf99b79
2 changed files with 14 additions and 3 deletions
14
Makefile
14
Makefile
|
@ -1,12 +1,22 @@
|
||||||
HAML=haml
|
HAML=haml
|
||||||
|
XMLLINT=xmllint
|
||||||
|
|
||||||
all: index.html doprava.html
|
all: index.html doprava.html
|
||||||
|
|
||||||
refresh:
|
get:
|
||||||
curl 'http://vvv.chmi.cz/hydro/detail_stanice/307225.html' | sed -n '153,302 p' > data/table.html
|
curl 'http://vvv.chmi.cz/hydro/detail_stanice/307225.html' > data/307225.html
|
||||||
wget 'http://vvv.chmi.cz/hydro/graph/big/307225_H.png' -O data/stav.png
|
wget 'http://vvv.chmi.cz/hydro/graph/big/307225_H.png' -O data/stav.png
|
||||||
wget 'http://vvv.chmi.cz/hydro/graph/big/307225_Q.png' -O data/prutok.png
|
wget 'http://vvv.chmi.cz/hydro/graph/big/307225_Q.png' -O data/prutok.png
|
||||||
|
|
||||||
|
scrap: get
|
||||||
|
xmllint --html --encode utf8 data/307225.html --output data/307225.html
|
||||||
|
xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/p[2]/text()' data/307225.html > data/timestamp.html
|
||||||
|
xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/table[2]//table[5]/tr[position()>1]' data/307225.html > data/table.html
|
||||||
|
# xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/table[2]//table[3]//tr' data/307225.html | iconv -f cp1250 -t utf8 > data/legend.html
|
||||||
|
# xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/table[2]//table[1]//tr' data/307225.html | iconv -f cp1250 -t utf8 > data/info.html
|
||||||
|
|
||||||
|
refresh: get scrap
|
||||||
|
|
||||||
%.html: %.haml
|
%.html: %.haml
|
||||||
$(HAML) $< $@
|
$(HAML) $< $@
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,8 @@
|
||||||
%li <a href='doprava.html'>Doprava</a>
|
%li <a href='doprava.html'>Doprava</a>
|
||||||
|
|
||||||
.content
|
.content
|
||||||
|
%p
|
||||||
|
=File.read('data/timestamp.html')
|
||||||
%table.twocolumn
|
%table.twocolumn
|
||||||
%tr
|
%tr
|
||||||
%td(style="padding: 16px;")
|
%td(style="padding: 16px;")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue