summaryrefslogtreecommitdiff
path: root/extract.rb
diff options
context:
space:
mode:
Diffstat (limited to 'extract.rb')
-rw-r--r--extract.rb158
1 files changed, 158 insertions, 0 deletions
diff --git a/extract.rb b/extract.rb
new file mode 100644
index 0000000..10daf66
--- /dev/null
+++ b/extract.rb
@@ -0,0 +1,158 @@
1require 'httparty'
2require 'nokogiri'
3require 'date'
4
5class NokogiriParser < HTTParty::Parser
6 def html
7 Nokogiri::HTML(body)
8 end
9end
10
11class HVPage
12 include HTTParty
13 parser NokogiriParser
14 base_uri "http://www.holidayvalley.com"
15end
16
17page = HVPage.get("/HolidayValley/snowreport.aspx")
18
19
20class InvalidData < Exception
21end
22
23module Transforms
24 def self.parse_range(value)
25 value.split("-").map(&:to_i)
26 end
27
28 def self.to_int(value)
29 value.to_i
30 end
31
32 def self.is_true(value)
33 value.downcase == "yes"
34 end
35
36 def self.parse_date(value)
37 Date.strptime(value, "%m/%d/%Y").strftime("%a, %d %b %Y %H:%M:%S %Z")
38 end
39
40 def self.parse_date_time(value)
41 DateTime.parse(value).strftime("%a, %d %b %Y %H:%M:%S %Z")
42 end
43
44 def self.parse_open(value)
45 value.downcase == "open"
46 end
47
48 def self.parse_groomed(value)
49 value.downcase == "groomed"
50 end
51
52 def self.parse_snowmaking(value)
53 value.downcase == "new"
54 end
55
56 def self.parse_difficulty(url)
57 if /green\.gif/ =~ url
58 "Easier"
59 elsif /blue\.gif/ =~ url
60 "Intermediate"
61 elsif /black\.gif/ =~ url
62 "Advanced"
63 elsif /doubleBlack\.gif/ =~ url
64 "Expert"
65 elsif /freestyle\.gif/ =~ url
66 "Freestyle"
67 end
68 end
69
70 def self.parse_lift_name(value)
71 data = /([^(]+)\(([^']*)'\)/.match(value)
72 [data[1].strip, data[2].to_i]
73 end
74end
75
76MTN_REPORT_XPATH = {
77 last_updated: ["//table[1]/tr[1]/td[2]/text()", :last, Transforms.method(:parse_date_time)],
78 report_for: ["//table[1]/tr[2]/td[2]/text()", 1, Transforms.method(:parse_date)],
79 snowfall_24hr: ["//table[1]/tr[3]/td[2]/text()", :first, Transforms.method(:to_int)],
80 snowfall_48hr: ["//table[1]/tr[4]/td[2]/text()", :first, Transforms.method(:to_int)],
81 snowfall_7day: ["//table[1]/tr[5]/td[2]/text()", :first, Transforms.method(:to_int)],
82 snowfall_season: ["//table[1]/tr[6]/td[2]/text()", :first, Transforms.method(:to_int)],
83 base_depth: ["//table[1]/tr[7]/td[2]/text()", :first, Transforms.method(:parse_range)],
84 snowmaking_current: ["//table[1]/tr[8]/td[2]/text()", :first, Transforms.method(:is_true)],
85 snowmaking_24hours: ["//table[1]/tr[9]/td[2]/text()", :first, Transforms.method(:is_true)],
86 primary_surface: ["//table[1]/tr[10]/td[2]/text()", :first, String.method(:new)],
87 secondary_surface: ["//table[1]/tr[11]/td[2]/text()", :first, String.method(:new)],
88}
89
90def get_transformed_value(page, xpath, finder, transform)
91 data = page.xpath(xpath)
92 data = finder.is_a?(Symbol) ? data.send(finder) : data[finder]
93 transform.call(data.text)
94end
95
96def get_trail_info(page, row)
97 data = page.xpath("//table[2]/tr[#{row}]/child::*").map(&:text)
98 difficulty_info = page.xpath("//table[2]/tr[#{row}]/td/img/@src").to_s
99
100 if data.size != 6
101 raise InvalidData.new("Invalid data in row")
102 end
103
104 {
105 trail_name: data[0],
106 difficulty: Transforms.parse_difficulty(difficulty_info),
107 open_day: Transforms.parse_open(data[2]),
108 open_night: Transforms.parse_open(data[3]),
109 groomed: Transforms.parse_groomed(data[4]),
110 snow_making: Transforms.parse_snowmaking(data[5]),
111 }
112end
113
114def get_lift_info(page, row)
115 data = page.xpath("//table[3]/tr[#{row}]/child::*").map(&:text)
116
117 if data.size != 4
118 raise InvalidData.new("Invalid data in row")
119 end
120
121 lift_name, vertical = Transforms.parse_lift_name(data[0])
122
123 {
124 lift_name: lift_name,
125 vertical: vertical,
126 open_day: Transforms.parse_open(data[1]),
127 open_night: Transforms.parse_open(data[2]),
128 notes: data[3],
129 }
130end
131
132i = 3
133while true
134 begin
135 puts get_trail_info(page, i)
136 i += 1
137 rescue InvalidData
138 break
139 end
140end
141
142k = {}
143MTN_REPORT_XPATH.each do |key, value|
144 data = get_transformed_value(page, *value)
145 k[key] = data
146end
147
148puts k
149
150i = 2
151while true
152 begin
153 puts get_lift_info(page, i)
154 i += 1
155 rescue InvalidData
156 break
157 end
158end