diff options
Diffstat (limited to 'extract.rb')
-rw-r--r-- | extract.rb | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/extract.rb b/extract.rb new file mode 100644 index 0000000..10daf66 --- /dev/null +++ b/extract.rb | |||
@@ -0,0 +1,158 @@ | |||
1 | require 'httparty' | ||
2 | require 'nokogiri' | ||
3 | require 'date' | ||
4 | |||
5 | class NokogiriParser < HTTParty::Parser | ||
6 | def html | ||
7 | Nokogiri::HTML(body) | ||
8 | end | ||
9 | end | ||
10 | |||
11 | class HVPage | ||
12 | include HTTParty | ||
13 | parser NokogiriParser | ||
14 | base_uri "http://www.holidayvalley.com" | ||
15 | end | ||
16 | |||
17 | page = HVPage.get("/HolidayValley/snowreport.aspx") | ||
18 | |||
19 | |||
20 | class InvalidData < Exception | ||
21 | end | ||
22 | |||
23 | module Transforms | ||
24 | def self.parse_range(value) | ||
25 | value.split("-").map(&:to_i) | ||
26 | end | ||
27 | |||
28 | def self.to_int(value) | ||
29 | value.to_i | ||
30 | end | ||
31 | |||
32 | def self.is_true(value) | ||
33 | value.downcase == "yes" | ||
34 | end | ||
35 | |||
36 | def self.parse_date(value) | ||
37 | Date.strptime(value, "%m/%d/%Y").strftime("%a, %d %b %Y %H:%M:%S %Z") | ||
38 | end | ||
39 | |||
40 | def self.parse_date_time(value) | ||
41 | DateTime.parse(value).strftime("%a, %d %b %Y %H:%M:%S %Z") | ||
42 | end | ||
43 | |||
44 | def self.parse_open(value) | ||
45 | value.downcase == "open" | ||
46 | end | ||
47 | |||
48 | def self.parse_groomed(value) | ||
49 | value.downcase == "groomed" | ||
50 | end | ||
51 | |||
52 | def self.parse_snowmaking(value) | ||
53 | value.downcase == "new" | ||
54 | end | ||
55 | |||
56 | def self.parse_difficulty(url) | ||
57 | if /green\.gif/ =~ url | ||
58 | "Easier" | ||
59 | elsif /blue\.gif/ =~ url | ||
60 | "Intermediate" | ||
61 | elsif /black\.gif/ =~ url | ||
62 | "Advanced" | ||
63 | elsif /doubleBlack\.gif/ =~ url | ||
64 | "Expert" | ||
65 | elsif /freestyle\.gif/ =~ url | ||
66 | "Freestyle" | ||
67 | end | ||
68 | end | ||
69 | |||
70 | def self.parse_lift_name(value) | ||
71 | data = /([^(]+)\(([^']*)'\)/.match(value) | ||
72 | [data[1].strip, data[2].to_i] | ||
73 | end | ||
74 | end | ||
75 | |||
76 | MTN_REPORT_XPATH = { | ||
77 | last_updated: ["//table[1]/tr[1]/td[2]/text()", :last, Transforms.method(:parse_date_time)], | ||
78 | report_for: ["//table[1]/tr[2]/td[2]/text()", 1, Transforms.method(:parse_date)], | ||
79 | snowfall_24hr: ["//table[1]/tr[3]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
80 | snowfall_48hr: ["//table[1]/tr[4]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
81 | snowfall_7day: ["//table[1]/tr[5]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
82 | snowfall_season: ["//table[1]/tr[6]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
83 | base_depth: ["//table[1]/tr[7]/td[2]/text()", :first, Transforms.method(:parse_range)], | ||
84 | snowmaking_current: ["//table[1]/tr[8]/td[2]/text()", :first, Transforms.method(:is_true)], | ||
85 | snowmaking_24hours: ["//table[1]/tr[9]/td[2]/text()", :first, Transforms.method(:is_true)], | ||
86 | primary_surface: ["//table[1]/tr[10]/td[2]/text()", :first, String.method(:new)], | ||
87 | secondary_surface: ["//table[1]/tr[11]/td[2]/text()", :first, String.method(:new)], | ||
88 | } | ||
89 | |||
90 | def get_transformed_value(page, xpath, finder, transform) | ||
91 | data = page.xpath(xpath) | ||
92 | data = finder.is_a?(Symbol) ? data.send(finder) : data[finder] | ||
93 | transform.call(data.text) | ||
94 | end | ||
95 | |||
96 | def get_trail_info(page, row) | ||
97 | data = page.xpath("//table[2]/tr[#{row}]/child::*").map(&:text) | ||
98 | difficulty_info = page.xpath("//table[2]/tr[#{row}]/td/img/@src").to_s | ||
99 | |||
100 | if data.size != 6 | ||
101 | raise InvalidData.new("Invalid data in row") | ||
102 | end | ||
103 | |||
104 | { | ||
105 | trail_name: data[0], | ||
106 | difficulty: Transforms.parse_difficulty(difficulty_info), | ||
107 | open_day: Transforms.parse_open(data[2]), | ||
108 | open_night: Transforms.parse_open(data[3]), | ||
109 | groomed: Transforms.parse_groomed(data[4]), | ||
110 | snow_making: Transforms.parse_snowmaking(data[5]), | ||
111 | } | ||
112 | end | ||
113 | |||
114 | def get_lift_info(page, row) | ||
115 | data = page.xpath("//table[3]/tr[#{row}]/child::*").map(&:text) | ||
116 | |||
117 | if data.size != 4 | ||
118 | raise InvalidData.new("Invalid data in row") | ||
119 | end | ||
120 | |||
121 | lift_name, vertical = Transforms.parse_lift_name(data[0]) | ||
122 | |||
123 | { | ||
124 | lift_name: lift_name, | ||
125 | vertical: vertical, | ||
126 | open_day: Transforms.parse_open(data[1]), | ||
127 | open_night: Transforms.parse_open(data[2]), | ||
128 | notes: data[3], | ||
129 | } | ||
130 | end | ||
131 | |||
132 | i = 3 | ||
133 | while true | ||
134 | begin | ||
135 | puts get_trail_info(page, i) | ||
136 | i += 1 | ||
137 | rescue InvalidData | ||
138 | break | ||
139 | end | ||
140 | end | ||
141 | |||
142 | k = {} | ||
143 | MTN_REPORT_XPATH.each do |key, value| | ||
144 | data = get_transformed_value(page, *value) | ||
145 | k[key] = data | ||
146 | end | ||
147 | |||
148 | puts k | ||
149 | |||
150 | i = 2 | ||
151 | while true | ||
152 | begin | ||
153 | puts get_lift_info(page, i) | ||
154 | i += 1 | ||
155 | rescue InvalidData | ||
156 | break | ||
157 | end | ||
158 | end | ||