Skip to main content
Beautiful Soup HTML parsing

The following Python code fetches the specific windspeed web page and extracts the timestamp, average windspeed, direction, gust speed and writes out data to a date stamped file named say /home/user/wind_data/windspeed_date(2015-04-21-12).txt. Schedule a cron job to run this every day at midnight say. The windspeed file can be selected for a particular day and processed by graph.py. 


#!/usr/bin/python

import os
import requests
import time
from bs4 import BeautifulSoup

date_stamp = time.strftime('%Y-%m-%d-%H',(time.localtime(time.time())))

outfile = os.path.join(os.path.expanduser('~'), 'wind_data', "windspeed_%s.txt"%date_stamp)
f = open(outfile,'w')
list = []
r = requests.get("http://xxxxx.wwww.yyyyy")
soup = BeautifulSoup(r.content)
table = soup.find("table", {"id":"grid"})

for line in table.findAll('tr'):
for l in line.findAll('td'):
str = l.getText()
list.append(str)

for item in list:
f.write("%s\n" % item)
f.close()


The following python program graphs the data from the windspeed text file.

#!/usr/bin/python

# This program requires the input of the date reference of the file
# created by the scraping program hha.py. That program stores the
# scraped data in file named windspeed_2015-04-21.txt for example.
# The scraped data is in the form of date time /n ave windspeed /n
# wind direction /n gust speed /n

# 21/04/15 22:10
# 7.19kt
# 40.10deg
# 11.46kt
# 21/04/15 22:00
# 5.44kt
# 32.70deg
# 10.88kt
# 21/04/15 21:50
# 6.41kt
# 40.40deg
# 10.88kt


import numpy as np
import matplotlib.pyplot as plt

#following for earlier version of file processing
date = raw_input("Enter date as yyyy-mm-dd ")
file = 'windspeed'+'_'+ date

list = open('%s.txt' % file,'r').readlines()

timestr = []        # list containing the time string e.g. 10:20
for i in list[::4]:
    v = i[-6:-1]
    timestr.append(v)

time = []        # list containing the time samples as numbers e.g. 10.2
for i in list[::4]:            # start at element 0 and step 4
    u = i[-6:-1]
    u = float(u.replace(':','.'))    # replace the time sec colon
    time.append(u)

wind_ave = []
for i in list[1::4]:            # start at element 1 and step 4
    w = float(i[:-3])        # remove the last 3 chars inc /n
    wind_ave.append(w)

wind_ave = wind_ave[::-1]

direction = []
for i in list[2::4]:
    y = float(i[:-4])        # remove last 4 chars inc /n
    direction.append(y)


gust = []
for i in list[3::4]:
    z = float(i[:-3])    # remove the last 3 characters kt + /n
    gust.append(z)

gust = gust[::-1]

p = range(len(time))


timelabel = []
for i in timestr:
    if i in ['00:00','03:00','06:00','09:00','12:00','15:00','18:00','21:00','24:00']:
        timelabel.append(i)
    else:
        i = ' '
        timelabel.append(i)

timelabel = timelabel[::-1]

d = 21
plt.xticks(p,timelabel)

plt.plot(p,gust, '-r', label = 'gust speed')    # solid red line
plt.plot(p, wind_ave, '-b', label = 'ave speed')    # solid blue line
plt.legend(loc='upper right')

plt.xlabel('time (10 min intervals)')
plt.ylabel('windspeed (kt)')
plt.title('Landguard windspeed on %s'%date)
plt.grid(True)
#savefig("windspeed.png")
plt.show()


# r = np.arange(0, 3.0, 0.01)

r = 2 * np.pi/360
direction = np.asarray(direction)
theta = r * direction

ax = plt.subplot(111, polar=True)
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
ax.scatter(theta, wind_ave, color='r', linewidth=3)
ax.set_rmax(20.0)
ax.grid(True)

ax.set_title("wind direction on a polar axis on %s"%date, va='bottom')
plt.show()

Comments