mirror of
https://github.com/brmlab/povoden.git
synced 2025-06-08 02:04:01 +02:00
using xpath to scrape web
This commit is contained in:
parent
55f9618a24
commit
323cf99b79
2 changed files with 14 additions and 3 deletions
14
Makefile
14
Makefile
|
@ -1,12 +1,22 @@
|
|||
HAML=haml
|
||||
XMLLINT=xmllint
|
||||
|
||||
all: index.html doprava.html
|
||||
|
||||
refresh:
|
||||
curl 'http://vvv.chmi.cz/hydro/detail_stanice/307225.html' | sed -n '153,302 p' > data/table.html
|
||||
get:
|
||||
curl 'http://vvv.chmi.cz/hydro/detail_stanice/307225.html' > data/307225.html
|
||||
wget 'http://vvv.chmi.cz/hydro/graph/big/307225_H.png' -O data/stav.png
|
||||
wget 'http://vvv.chmi.cz/hydro/graph/big/307225_Q.png' -O data/prutok.png
|
||||
|
||||
scrap: get
|
||||
xmllint --html --encode utf8 data/307225.html --output data/307225.html
|
||||
xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/p[2]/text()' data/307225.html > data/timestamp.html
|
||||
xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/table[2]//table[5]/tr[position()>1]' data/307225.html > data/table.html
|
||||
# xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/table[2]//table[3]//tr' data/307225.html | iconv -f cp1250 -t utf8 > data/legend.html
|
||||
# xmllint --html --xpath '//div[@class="box"]/div[@class="cont"]/table[2]//table[1]//tr' data/307225.html | iconv -f cp1250 -t utf8 > data/info.html
|
||||
|
||||
refresh: get scrap
|
||||
|
||||
%.html: %.haml
|
||||
$(HAML) $< $@
|
||||
|
||||
|
|
|
@ -13,7 +13,8 @@
|
|||
%li <a href='doprava.html'>Doprava</a>
|
||||
|
||||
.content
|
||||
|
||||
%p
|
||||
=File.read('data/timestamp.html')
|
||||
%table.twocolumn
|
||||
%tr
|
||||
%td(style="padding: 16px;")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue