-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrackdays.py
102 lines (88 loc) · 2.18 KB
/
trackdays.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import bs4
from common import *
from dateutil import parser
import re
import requests
def toKind(desc):
lower = desc.lower()
if 'evening' in lower:
return Kind.EVENING
elif 'novice' in lower:
return Kind.NOVICE
elif 'masterclass' in lower:
return Kind.TRAINING
elif 'open pitlane' in lower:
return Kind.OPL
elif 'road bikes only' in lower:
return Kind.RBO
return Kind.NORMAL
def cleanCircuit(circuit, subcircuit):
circuit = circuit + ' ' + subcircuit
lower = circuit.lower()
if lower.endswith(' full circuit'):
return circuit[:-13]
if lower.endswith(' circuit'):
return circuit[:-8]
return circuit
def inUK(circuit):
whitelist = [
'rockingham',
'bedford',
'snetterton',
'donington',
'silverstone',
'brands hatch',
'pembrey',
'castle combe',
'cadwell park',
'croft',
'anglesey',
'mallory park',
'lydden hill',
'oulton park',
'blyton park',
]
for ok in whitelist:
if circuit.lower().startswith(ok):
return True
return False
def parse(elem):
labels = elem.find_all('label')
if len(labels) < 7:
return None
try:
(date, circuit, subcircuit, desc1, desc2) = map(
lambda x: x.text.encode('utf-8', 'ignore'), labels[1:6])
price = labels[-1].text.encode('utf-8', 'ignore')
except ValueError as e:
print e
print 'skipping {}'.format(labels)
return None
# TODO: use avail
# bleh. filter out the header.
if date == 'Track':
return None
if not inUK(circuit):
print 'Rejecting track ' + circuit
return None
date = parser.parse(date, dayfirst=True)
kind = toKind(desc1)
circuit = cleanCircuit(circuit, subcircuit)
url = 'https://www.trackdays.co.uk' + elem.a['href'] if elem.a else None
return {
'company': Company.TRACKDAYS,
'date': date,
'track': circuit,
'kind': kind,
'desc': '{}; {}; {}'.format(desc1, desc2, price),
'url': url,
}
def scrape():
soup = fetch_soup('trackdays.html',
'https://www.trackdays.co.uk/calendar/bikes/')
elems = soup.find_all(class_='row')
events = map(parse, elems)
return filter(bool, events)
if __name__ == '__main__':
for e in scrape():
print e