Beautiful Soup HTML parsing
The following Python code fetches the specific windspeed web page and extracts the timestamp, average windspeed, direction, gust speed and writes out data to a date stamped file named say /home/user/wind_data/windspeed_date(2015-04-21-12).txt. Schedule a cron job to run this every day at midnight say. The windspeed file can be selected for a particular day and processed by graph.py.
#!/usr/bin/python
import os
import requests
import time
from bs4 import BeautifulSoup
date_stamp = time.strftime('%Y-%m-%d-%H',(time.localtime(time.time())))
outfile = os.path.join(os.path.expanduser('~'), 'wind_data', "windspeed_%s.txt"%date_stamp)
f = open(outfile,'w')
list = []
r = requests.get("http://xxxxx.wwww.yyyyy")
soup = BeautifulSoup(r.content)
table = soup.find("table", {"id":"grid"})
for line in table.findAll('tr'):
for l in line.findAll('td'):
str = l.getText()
list.append(str)
for item in list:
f.write("%s\n" % item)
f.close()
The following python program graphs the data from the windspeed text file.
#!/usr/bin/python
# This program requires the input of the date reference of the file
# created by the scraping program hha.py. That program stores the
# scraped data in file named windspeed_2015-04-21.txt for example.
# The scraped data is in the form of date time /n ave windspeed /n
# wind direction /n gust speed /n
# 21/04/15 22:10
# 7.19kt
# 40.10deg
# 11.46kt
# 21/04/15 22:00
# 5.44kt
# 32.70deg
# 10.88kt
# 21/04/15 21:50
# 6.41kt
# 40.40deg
# 10.88kt
import numpy as np
import matplotlib.pyplot as plt
#following for earlier version of file processing
date = raw_input("Enter date as yyyy-mm-dd ")
file = 'windspeed'+'_'+ date
list = open('%s.txt' % file,'r').readlines()
timestr = [] # list containing the time string e.g. 10:20
for i in list[::4]:
v = i[-6:-1]
timestr.append(v)
time = [] # list containing the time samples as numbers e.g. 10.2
for i in list[::4]: # start at element 0 and step 4
u = i[-6:-1]
u = float(u.replace(':','.')) # replace the time sec colon
time.append(u)
wind_ave = []
for i in list[1::4]: # start at element 1 and step 4
w = float(i[:-3]) # remove the last 3 chars inc /n
wind_ave.append(w)
wind_ave = wind_ave[::-1]
direction = []
for i in list[2::4]:
y = float(i[:-4]) # remove last 4 chars inc /n
direction.append(y)
gust = []
for i in list[3::4]:
z = float(i[:-3]) # remove the last 3 characters kt + /n
gust.append(z)
gust = gust[::-1]
p = range(len(time))
timelabel = []
for i in timestr:
if i in ['00:00','03:00','06:00','09:00','12:00','15:00','18:00','21:00','24:00']:
timelabel.append(i)
else:
i = ' '
timelabel.append(i)
timelabel = timelabel[::-1]
d = 21
plt.xticks(p,timelabel)
plt.plot(p,gust, '-r', label = 'gust speed') # solid red line
plt.plot(p, wind_ave, '-b', label = 'ave speed') # solid blue line
plt.legend(loc='upper right')
plt.xlabel('time (10 min intervals)')
plt.ylabel('windspeed (kt)')
plt.title('Landguard windspeed on %s'%date)
plt.grid(True)
#savefig("windspeed.png")
plt.show()
# r = np.arange(0, 3.0, 0.01)
r = 2 * np.pi/360
direction = np.asarray(direction)
theta = r * direction
ax = plt.subplot(111, polar=True)
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
ax.scatter(theta, wind_ave, color='r', linewidth=3)
ax.set_rmax(20.0)
ax.grid(True)
ax.set_title("wind direction on a polar axis on %s"%date, va='bottom')
plt.show()
Comments