In [1]:
import requests
from bs4 import BeautifulSoup
In [2]:
# soup = BeautifulSoup(open("/home/al/projects/webscraping/venv/weather_app/Timmins.html"), "html.parser")
url = 'https://weather.gc.ca/city/pages/on-127_metric_e.html'
page = requests.get(url)
In [3]:
soup = BeautifulSoup(page.content, 'html.parser')
In [4]:
#list(soup.children)
In [5]:
[type(item) for item in list(soup.children)]
Out[5]:
[bs4.element.Doctype,
 bs4.element.NavigableString,
 bs4.element.Comment,
 bs4.element.NavigableString,
 bs4.element.Comment,
 bs4.element.NavigableString,
 bs4.element.Tag,
 bs4.element.NavigableString]
In [6]:
html = list(soup.children)[6]
In [7]:
len(list(html.children))
Out[7]:
6

Start of Work

1 Get City Name

Timmins, ON - 10:20 AM EDT Thursday 18 April 2019

Temp - 2C

Condition - Light Rain

Pressure - Rising

Humidity - 99%

Wind - N 18 km/h

Visibility - 24km

Tonight: Periods of rain changing to periods of light snow this evening. Wind north 20 km/h gusting to 40 becoming light late this evening. Low minus 2. Wind chill minus 7 overnight.

Fri, 19 Apr: Clearing in the morning. High 9. UV index 6 or high.

Night: Partly cloudy. Becoming clear near midnight. Wind up to 15 km/h. Low minus 4. Wind chill minus 5 overnight.

In [8]:
city_find = soup.find_all('h1')[0]
city = city_find.get_text()
print(city)
Timmins, ON

2 Get the current date

In [9]:
date_whole_div = soup.find_all('div', class_="col-sm-10 text-center")
In [10]:
todays_date_in_div = soup.find_all('dd', class_="mrgn-bttm-0")[1]
print(todays_date_in_div)
<dd class="mrgn-bttm-0">7:00 AM EDT Monday 22 April 2019</dd>
In [11]:
todays_date = todays_date_in_div.get_text()
todays_date
Out[11]:
'7:00 AM EDT Monday 22 April 2019'
In [12]:
city
soup.find_all('h1')[0].get_text()
Out[12]:
'Timmins, ON\n'
In [13]:
print('{}{}'.format(city, todays_date))
Timmins, ON
7:00 AM EDT Monday 22 April 2019

3 Get the Days of the week

In [14]:
div_tables = soup.find_all('div', class_='div-table')
#div_tables
In [15]:
today_is_html = soup.find_all('strong')[0]
tomorrow_is_html = soup.find_all('strong')[1]
In [16]:
print(today_is_html)
<strong title="Monday">Mon</strong>
In [17]:
print(tomorrow_is_html)
<strong title="Tuesday">Tue</strong>
In [18]:
today_is = today_is_html.get_text()
tomorrow_is = tomorrow_is_html.get_text()
print('{} and {}'.format(today_is, tomorrow_is))
Mon and Tue

Messing around with stuff

testing = soup.findall('div', class='div-row dir-row1 div-row-head') testing

In [19]:
i = 0


list = soup.find_all('strong')


# print(list[10].get_text())
list
Out[19]:
[<strong title="Monday">Mon</strong>,
 <strong title="Tuesday">Tue</strong>,
 <strong title="Wednesday">Wed</strong>,
 <strong title="Thursday">Thu</strong>,
 <strong title="Friday">Fri</strong>,
 <strong title="Saturday">Sat</strong>,
 <strong title="Sunday">Sun</strong>,
 <strong title="Monday">Today</strong>,
 <strong title="Monday night">Tonight</strong>,
 <strong><abbr title="Tuesday">Tue</abbr>, 23 <abbr title="April">Apr</abbr></strong>,
 <strong><abbr title="Wednesday">Wed</abbr>, 24 <abbr title="April">Apr</abbr></strong>,
 <strong><abbr title="Thursday">Thu</abbr>, 25 <abbr title="April">Apr</abbr></strong>,
 <strong><abbr title="Friday">Fri</abbr>, 26 <abbr title="April">Apr</abbr></strong>,
 <strong><abbr title="Saturday">Sat</abbr>, 27 <abbr title="April">Apr</abbr></strong>,
 <strong><abbr title="Sunday">Sun</abbr>, 28 <abbr title="April">Apr</abbr></strong>,
 <strong title="Monday">Today</strong>,
 <strong title="Monday night">Tonight</strong>,
 <strong><abbr title="Tuesday">Tue</abbr>, 23 <abbr title="April">Apr</abbr></strong>,
 <strong> Night: </strong>,
 <strong><abbr title="Wednesday">Wed</abbr>, 24 <abbr title="April">Apr</abbr></strong>,
 <strong> Night: </strong>,
 <strong><abbr title="Thursday">Thu</abbr>, 25 <abbr title="April">Apr</abbr></strong>,
 <strong> Night: </strong>,
 <strong><abbr title="Friday">Fri</abbr>, 26 <abbr title="April">Apr</abbr></strong>,
 <strong> Night: </strong>,
 <strong><abbr title="Saturday">Sat</abbr>, 27 <abbr title="April">Apr</abbr></strong>,
 <strong> Night: </strong>,
 <strong><abbr title="Sunday">Sun</abbr>, 28 <abbr title="April">Apr</abbr></strong>,
 <strong>Warning:</strong>]

Get the Temperature today

In [20]:
find_temp = soup.find_all('span', class_='wxo-metric-hide')[0]
print(find_temp.get_text())
-2°C

Get the current conditions

In [21]:
find_conditions_all = soup.find_all('dd', class_='mrgn-bttm-0')
conditions = find_conditions_all[2]
conditions.get_text()
Out[21]:
'Light Freezing Rain'

Get the barometric pressure and trend

In [22]:
barometric_pressure_dd = find_conditions_all[3]
barometric_pressure = barometric_pressure_dd.get_text()
barometric_pressure
#score, seperator, subject = title.partition('::')
beg, sep, end = barometric_pressure.partition("a")
beg
                                              
Out[22]:
'102.8 kP'
In [23]:
tendency_dd = find_conditions_all[5]
tendency = tendency_dd.get_text()
tendency
Out[23]:
'Rising'
In [24]:
print('Barometric Pressure is {}a and {}'.format(beg, tendency))
Barometric Pressure is 102.8 kPa and Rising

Get the current humidity and wind speed

In [25]:
humidity_dd = find_conditions_all[10]
humidity = humidity_dd.get_text()
print(humidity)
88%
In [26]:
wind_dd = find_conditions_all[11]
wind = wind_dd.get_text()
wind.strip('\n')
Out[26]:
'E 13 km/h'
In [27]:
p = find_conditions_all[3].get_text().replace('\n ','')
print('{} sdafj'.format(p))
102.8 kPa
 sdafj

Get extended text forecast details

In [28]:
extended_forecast_top_level = soup.find_all('table', class_='table mrgn-bttm-md mrgn-tp-sm textforecast')
len(extended_forecast_top_level)
Out[28]:
1
In [29]:
extended_field0 = soup.select('tbody')
extended_field_td = soup.select('td')
extended_field_td
Out[29]:
[<td><a href="/forecast/hourly/on-127_metric_e.html"><strong title="Monday">Today</strong></a></td>,
 <td>Periods of freezing rain mixed with snow changing to rain near noon. Rainfall amount 5 mm. High plus 2. UV index 2 or low.</td>,
 <td><strong title="Monday night">Tonight</strong></td>,
 <td>Rain or snow. Rainfall amount 15 to 25 mm except locally snowfall amount 10 cm over northern sections. Low plus 1.</td>,
 <td class="uniform_width"><strong><abbr title="Tuesday">Tue</abbr>, 23 <abbr title="April">Apr</abbr></strong></td>,
 <td>Rain or snow changing to snow in the afternoon. Rainfall amount 15 to 25 mm. Snowfall amount 2 cm except locally 10 cm over northern sections. Wind becoming north 20 km/h in the afternoon. Temperature steady near zero. UV index 2 or low.</td>,
 <td class="uniform_width" title="Tuesday night"> Night</td>,
 <td>Periods of rain or snow. Low zero.</td>,
 <td class="uniform_width"><strong><abbr title="Wednesday">Wed</abbr>, 24 <abbr title="April">Apr</abbr></strong></td>,
 <td>A mix of sun and cloud with 70 percent chance of flurries. High 12.</td>,
 <td class="uniform_width" title="Wednesday night"> Night</td>,
 <td>Cloudy with 60 percent chance of showers. Low plus 3.</td>,
 <td class="uniform_width"><strong><abbr title="Thursday">Thu</abbr>, 25 <abbr title="April">Apr</abbr></strong></td>,
 <td>Periods of rain. High 11.</td>,
 <td class="uniform_width" title="Thursday night"> Night</td>,
 <td>Cloudy with 70 percent chance of showers. Low plus 3.</td>,
 <td class="uniform_width"><strong><abbr title="Friday">Fri</abbr>, 26 <abbr title="April">Apr</abbr></strong></td>,
 <td>Rain. High 8.</td>,
 <td class="uniform_width" title="Friday night"> Night</td>,
 <td>Periods of rain or snow. Low minus 6.</td>,
 <td class="uniform_width"><strong><abbr title="Saturday">Sat</abbr>, 27 <abbr title="April">Apr</abbr></strong></td>,
 <td>A mix of sun and cloud with 30 percent chance of flurries. High 6.</td>,
 <td class="uniform_width" title="Saturday night"> Night</td>,
 <td>Cloudy periods with 30 percent chance of flurries. Low minus 6.</td>,
 <td class="uniform_width"><strong><abbr title="Sunday">Sun</abbr>, 28 <abbr title="April">Apr</abbr></strong></td>,
 <td>A mix of sun and cloud with 30 percent chance of flurries or rain showers. High plus 4.</td>,
 <td>
 <abbr title="Maximum">Max</abbr> <span class="wxo-metric-hide" title="max">11°<abbr title="Celsius">C</abbr>.</span><span class="wxo-imperial-hide wxo-city-hidden" title="max">52°<abbr title="Fahrenheit">F</abbr>.</span> <abbr title="Minimum">Min</abbr> <span class="wxo-metric-hide" title="min">-2°<abbr title="Celsius">C</abbr>.</span><span class="wxo-imperial-hide wxo-city-hidden" title="min">28°<abbr title="Fahrenheit">F</abbr>.</span>
 </td>,
 <td>6:23 EDT</td>,
 <td>20:25 EDT</td>]
In [30]:
# Sunrise and sunset
sunrise = soup.select('td')[25].get_text()
sunset = soup.select('td')[26].get_text()
print('Sunrise: {}  Sunset: {}'.format(sunrise, sunset))
Sunrise: A mix of sun and cloud with 30 percent chance of flurries or rain showers. High plus 4.  Sunset: 
Max 11°C.52°F. Min -2°C.28°F.

In [31]:
# Sunsets
sunset = soup.select('td')[26].get_text()
print('Sunset: {}'.format(sunset))
Sunset: 
Max 11°C.52°F. Min -2°C.28°F.

In [32]:
# use this convention for the following field0_td , field1_td
soup.select('td')[0].get_text()
Out[32]:
'Today'
In [33]:
soup.select('td')[1].get_text()
Out[33]:
'Periods of freezing rain mixed with snow changing to rain near noon. Rainfall amount 5 mm. High plus 2. UV index 2 or low.'
In [34]:
soup.select('td')[2].get_text()
Out[34]:
'Tonight'
In [35]:
soup.select('td')[3].get_text()
Out[35]:
'Rain or snow. Rainfall amount 15 to 25 mm except locally snowfall amount 10 cm over northern sections. Low plus 1.'
In [36]:
soup.select('td')[4].get_text().strip(' ')
Out[36]:
'Tue,\xa023\xa0Apr'
In [37]:
soup.select('td')[5].get_text()
Out[37]:
'Rain or snow changing to snow in the afternoon. Rainfall amount 15 to 25 mm. Snowfall amount 2 cm except locally 10 cm over northern sections. Wind becoming north 20 km/h in the afternoon. Temperature steady near zero. UV index 2 or low.'
In [38]:
soup.select('td')[6].get_text().strip(' ')
Out[38]:
'Night'
In [39]:
soup.select('td')[7].get_text()
Out[39]:
'Periods of rain or snow. Low zero.'
In [40]:
soup.select('td')[8].get_text()
Out[40]:
'Wed,\xa024\xa0Apr'
In [41]:
soup.select('td')[9].get_text()
Out[41]:
'A mix of sun and cloud with 70 percent chance of flurries. High 12.'
In [42]:
print(soup.select('td')[10].get_text())
 Night
In [43]:
soup.select('td')[11].get_text()
Out[43]:
'Cloudy with 60 percent chance of showers. Low plus 3.'
In [44]:
soup.select('td')[12].get_text()
Out[44]:
'Thu,\xa025\xa0Apr'
In [45]:
soup.select('td')[13].get_text()
Out[45]:
'Periods of rain. High 11.'
In [46]:
soup.select('td')[14].get_text().strip(' ')
Out[46]:
'Night'
In [47]:
soup.select('td')[15].get_text()
Out[47]:
'Cloudy with 70 percent chance of showers. Low plus 3.'
In [48]:
soup.select('td')[16].get_text()
Out[48]:
'Fri,\xa026\xa0Apr'
In [49]:
soup.select('td')[17].get_text()
Out[49]:
'Rain. High 8.'
In [50]:
soup.select('td')[18].get_text().strip(' ')
Out[50]:
'Night'
In [51]:
soup.select('td')[19].get_text()
Out[51]:
'Periods of rain or snow. Low minus 6.'
In [52]:
soup.select('td')[20].get_text()
Out[52]:
'Sat,\xa027\xa0Apr'
In [53]:
soup.select('td')[21].get_text()
Out[53]:
'A mix of sun and cloud with 30 percent chance of flurries. High 6.'
In [54]:
soup.select('td')[22].get_text().strip(' ')
Out[54]:
'Night'
In [55]:
soup.select('td')[23].get_text()
Out[55]:
'Cloudy periods with 30 percent chance of flurries. Low minus 6.'
In [56]:
soup.select('td')[24].get_text()
Out[56]:
'Sun,\xa028\xa0Apr'
In [57]:
soup.select('td')[26]
Out[57]:
<td>
<abbr title="Maximum">Max</abbr> <span class="wxo-metric-hide" title="max">11°<abbr title="Celsius">C</abbr>.</span><span class="wxo-imperial-hide wxo-city-hidden" title="max">52°<abbr title="Fahrenheit">F</abbr>.</span> <abbr title="Minimum">Min</abbr> <span class="wxo-metric-hide" title="min">-2°<abbr title="Celsius">C</abbr>.</span><span class="wxo-imperial-hide wxo-city-hidden" title="min">28°<abbr title="Fahrenheit">F</abbr>.</span>
</td>
In [58]:
table_wxo_normal = soup.find_all('table', class_='table wxo-normals')

Get the Normals for this time of year

In [59]:
min_max_field = soup.select('td span')
min_max_field
Out[59]:
[<span class="wxo-metric-hide" title="max">11°<abbr title="Celsius">C</abbr>.</span>,
 <span class="wxo-imperial-hide wxo-city-hidden" title="max">52°<abbr title="Fahrenheit">F</abbr>.</span>,
 <span class="wxo-metric-hide" title="min">-2°<abbr title="Celsius">C</abbr>.</span>,
 <span class="wxo-imperial-hide wxo-city-hidden" title="min">28°<abbr title="Fahrenheit">F</abbr>.</span>]
In [60]:
max = min_max_field[0]
min = min_max_field[2]
In [61]:
normals_min_max = max.get_text().strip('.'), min.get_text().strip('.')
In [62]:
print('Normals -- High: {}  Low: {}'.format(normals_min_max[0], normals_min_max[1]))
Normals -- High: 11°C  Low: -2°C
In [63]:
sun = soup.select('td')
In [64]:
sun.pop(-1)
Out[64]:
<td>20:25 EDT</td>
In [65]:
sun.pop(-1)
Out[65]:
<td>6:23 EDT</td>
In [66]:
sun.pop()
Out[66]:
<td>
<abbr title="Maximum">Max</abbr> <span class="wxo-metric-hide" title="max">11°<abbr title="Celsius">C</abbr>.</span><span class="wxo-imperial-hide wxo-city-hidden" title="max">52°<abbr title="Fahrenheit">F</abbr>.</span> <abbr title="Minimum">Min</abbr> <span class="wxo-metric-hide" title="min">-2°<abbr title="Celsius">C</abbr>.</span><span class="wxo-imperial-hide wxo-city-hidden" title="min">28°<abbr title="Fahrenheit">F</abbr>.</span>
</td>
In [67]:
sun
Out[67]:
[<td><a href="/forecast/hourly/on-127_metric_e.html"><strong title="Monday">Today</strong></a></td>,
 <td>Periods of freezing rain mixed with snow changing to rain near noon. Rainfall amount 5 mm. High plus 2. UV index 2 or low.</td>,
 <td><strong title="Monday night">Tonight</strong></td>,
 <td>Rain or snow. Rainfall amount 15 to 25 mm except locally snowfall amount 10 cm over northern sections. Low plus 1.</td>,
 <td class="uniform_width"><strong><abbr title="Tuesday">Tue</abbr>, 23 <abbr title="April">Apr</abbr></strong></td>,
 <td>Rain or snow changing to snow in the afternoon. Rainfall amount 15 to 25 mm. Snowfall amount 2 cm except locally 10 cm over northern sections. Wind becoming north 20 km/h in the afternoon. Temperature steady near zero. UV index 2 or low.</td>,
 <td class="uniform_width" title="Tuesday night"> Night</td>,
 <td>Periods of rain or snow. Low zero.</td>,
 <td class="uniform_width"><strong><abbr title="Wednesday">Wed</abbr>, 24 <abbr title="April">Apr</abbr></strong></td>,
 <td>A mix of sun and cloud with 70 percent chance of flurries. High 12.</td>,
 <td class="uniform_width" title="Wednesday night"> Night</td>,
 <td>Cloudy with 60 percent chance of showers. Low plus 3.</td>,
 <td class="uniform_width"><strong><abbr title="Thursday">Thu</abbr>, 25 <abbr title="April">Apr</abbr></strong></td>,
 <td>Periods of rain. High 11.</td>,
 <td class="uniform_width" title="Thursday night"> Night</td>,
 <td>Cloudy with 70 percent chance of showers. Low plus 3.</td>,
 <td class="uniform_width"><strong><abbr title="Friday">Fri</abbr>, 26 <abbr title="April">Apr</abbr></strong></td>,
 <td>Rain. High 8.</td>,
 <td class="uniform_width" title="Friday night"> Night</td>,
 <td>Periods of rain or snow. Low minus 6.</td>,
 <td class="uniform_width"><strong><abbr title="Saturday">Sat</abbr>, 27 <abbr title="April">Apr</abbr></strong></td>,
 <td>A mix of sun and cloud with 30 percent chance of flurries. High 6.</td>,
 <td class="uniform_width" title="Saturday night"> Night</td>,
 <td>Cloudy periods with 30 percent chance of flurries. Low minus 6.</td>,
 <td class="uniform_width"><strong><abbr title="Sunday">Sun</abbr>, 28 <abbr title="April">Apr</abbr></strong></td>,
 <td>A mix of sun and cloud with 30 percent chance of flurries or rain showers. High plus 4.</td>]
In [68]:
soup.select('div h2')[4].get_text() == 'No Alerts in effect'
Out[68]:
False
In [81]:
soup.select('a div')
Out[81]:
[<div class="col-xs-1"><span class="glyphicon glyphicon-warning-sign"></span></div>,
 <div class="col-xs-10 text-center">RAINFALL WARNING </div>,
 <div class="col-xs-1 text-right"><span class="glyphicon glyphicon-play"></span></div>,
 <div class="col-xs-1"><span class="glyphicon glyphicon-warning-sign"></span></div>,
 <div class="col-xs-10 text-center">FREEZING RAIN WARNING </div>,
 <div class="col-xs-1 text-right"><span class="glyphicon glyphicon-play"></span></div>,
 <div class="div-row div-row2 div-row-data">
 <img alt="Periods of freezing rain or rain" class="center-block" height="51" src="/weathericons/14.gif" width="60"/><p class="mrgn-bttm-0"><span class="high wxo-metric-hide" title="max">2°<abbr title="Celsius">C</abbr></span><span class="high wxo-imperial-hide wxo-city-hidden" title="max">36°<abbr title="Fahrenheit">F</abbr></span></p>
 <p class="mrgn-bttm-0 pop text-center"> </p>
 <p class="mrgn-bttm-0">Periods of freezing rain or rain</p>
 </div>,
 <div class="col-xs-1"><span class="glyphicon glyphicon-warning-sign"></span></div>,
 <div class="col-xs-10 text-center">RAINFALL WARNING </div>,
 <div class="col-xs-1 text-right"><span class="glyphicon glyphicon-play"></span></div>,
 <div class="col-xs-1"><span class="glyphicon glyphicon-warning-sign"></span></div>,
 <div class="col-xs-10 text-center">FREEZING RAIN WARNING </div>,
 <div class="col-xs-1 text-right"><span class="glyphicon glyphicon-play"></span></div>,
 <div class="col-xs-1"><span class="glyphicon glyphicon-warning-sign"></span></div>,
 <div class="col-xs-10 text-center">RAINFALL WARNING </div>,
 <div class="col-xs-1 text-right"><span class="glyphicon glyphicon-play"></span></div>,
 <div class="col-xs-1"><span class="glyphicon glyphicon-warning-sign"></span></div>,
 <div class="col-xs-10 text-center">FREEZING RAIN WARNING </div>,
 <div class="col-xs-1 text-right"><span class="glyphicon glyphicon-play"></span></div>,
 <div class="col-xs-6"><img alt="Periods of freezing rain or rain" class="pull-right" height="51" src="/weathericons/14.gif" width="60"/></div>,
 <div class="col-xs-6">
 <strong title="Monday">Today</strong><br/><span class="high wxo-metric-hide" title="max">2°<abbr title="Celsius">C</abbr></span><span class="high wxo-imperial-hide wxo-city-hidden" title="max">36°<abbr title="Fahrenheit">F</abbr></span>
 </div>,
 <div class="col-xs-12 brdr-bttm mrgn-bttm-md">Periods of freezing rain mixed with snow changing to rain near noon. Rainfall amount 5 mm. High plus 2. UV index 2 or low.</div>]
In [98]:
x = soup.select('a div')
warnings = soup.find_all('div', class_='col-xs-10 text-center')
In [99]:
warnings
Out[99]:
[<div class="col-xs-10 text-center">RAINFALL WARNING </div>,
 <div class="col-xs-10 text-center">FREEZING RAIN WARNING </div>,
 <div class="col-xs-10 text-center">RAINFALL WARNING </div>,
 <div class="col-xs-10 text-center">FREEZING RAIN WARNING </div>,
 <div class="col-xs-10 text-center">RAINFALL WARNING </div>,
 <div class="col-xs-10 text-center">FREEZING RAIN WARNING </div>]
In [100]:
alert1 = warnings.pop()
alert1.get_text().strip(' ')
Out[100]:
'FREEZING RAIN WARNING'
In [101]:
alert2 = warnings.pop()
alert2.get_text().strip(' ')
Out[101]:
'RAINFALL WARNING'
In [ ]: