From fe1d5707cf5a7db7d41ac6ba129155adcb230d95 Mon Sep 17 00:00:00 2001 From: Mike Crute Date: Tue, 28 Jul 2015 21:17:56 -0700 Subject: Initial import --- .rvmrc | 49 ++++++++++++++++++ Gemfile | 5 ++ Gemfile.lock | 31 ++++++++++++ extract.rb | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 243 insertions(+) create mode 100644 .rvmrc create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 extract.rb diff --git a/.rvmrc b/.rvmrc new file mode 100644 index 0000000..f879d68 --- /dev/null +++ b/.rvmrc @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# This is an RVM Project .rvmrc file, used to automatically load the ruby +# development environment upon cd'ing into the directory + +# First we specify our desired [@], the @gemset name is optional. +environment_id="ruby-1.9.2-p136@hv_snowreport" + +# +# First we attempt to load the desired environment directly from the environment +# file. This is very fast and efficicent compared to running through the entire +# CLI and selector. If you want feedback on which environment was used then +# insert the word 'use' after --create as this triggers verbose mode. +# +if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \ + && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then + \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id" + + [[ -s "$rvm_path/hooks/after_use" ]] && . "$rvm_path/hooks/after_use" +else + # If the environment file has not yet been created, use the RVM CLI to select. + rvm --create "$environment_id" +fi + +# +# If you use an RVM gemset file to install a list of gems (*.gems), you can have +# it be automatically loaded. Uncomment the following and adjust the filename if +# necessary. +# +# filename=".gems" +# if [[ -s "$filename" ]] ; then +# rvm gemset import "$filename" | grep -v already | grep -v listed | grep -v complete | sed '/^$/d' +# fi + +# +# If you use bundler and would like to run bundle each time you enter the +# directory, you can uncomment the following code. +# +# # Ensure that Bundler is installed. Install it if it is not. +# if ! command -v bundle >/dev/null; then +# printf "The rubygem 'bundler' is not installed. Installing it now.\n" +# gem install bundler +# fi +# +# # Bundle while reducing excess noise. +# printf "Bundling your gems. This may take a few minutes on a fresh clone.\n" +# bundle | grep -v '^Using ' | grep -v ' is complete' | sed '/^$/d' +# + diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..31b064d --- /dev/null +++ b/Gemfile @@ -0,0 +1,5 @@ +source :rubygems + +gem "httparty" +gem "nokogiri" +gem 'ruby-debug19', :require => 'ruby-debug' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..1e04b26 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,31 @@ +GEM + remote: http://rubygems.org/ + specs: + archive-tar-minitar (0.5.2) + columnize (0.3.6) + httparty (0.8.1) + multi_json + multi_xml + linecache19 (0.5.12) + ruby_core_source (>= 0.1.4) + multi_json (1.0.4) + multi_xml (0.4.1) + nokogiri (1.5.0) + ruby-debug-base19 (0.11.25) + columnize (>= 0.3.1) + linecache19 (>= 0.5.11) + ruby_core_source (>= 0.1.4) + ruby-debug19 (0.11.6) + columnize (>= 0.3.1) + linecache19 (>= 0.5.11) + ruby-debug-base19 (>= 0.11.19) + ruby_core_source (0.1.5) + archive-tar-minitar (>= 0.5.2) + +PLATFORMS + ruby + +DEPENDENCIES + httparty + nokogiri + ruby-debug19 diff --git a/extract.rb b/extract.rb new file mode 100644 index 0000000..10daf66 --- /dev/null +++ b/extract.rb @@ -0,0 +1,158 @@ +require 'httparty' +require 'nokogiri' +require 'date' + +class NokogiriParser < HTTParty::Parser + def html + Nokogiri::HTML(body) + end +end + +class HVPage + include HTTParty + parser NokogiriParser + base_uri "http://www.holidayvalley.com" +end + +page = HVPage.get("/HolidayValley/snowreport.aspx") + + +class InvalidData < Exception +end + +module Transforms + def self.parse_range(value) + value.split("-").map(&:to_i) + end + + def self.to_int(value) + value.to_i + end + + def self.is_true(value) + value.downcase == "yes" + end + + def self.parse_date(value) + Date.strptime(value, "%m/%d/%Y").strftime("%a, %d %b %Y %H:%M:%S %Z") + end + + def self.parse_date_time(value) + DateTime.parse(value).strftime("%a, %d %b %Y %H:%M:%S %Z") + end + + def self.parse_open(value) + value.downcase == "open" + end + + def self.parse_groomed(value) + value.downcase == "groomed" + end + + def self.parse_snowmaking(value) + value.downcase == "new" + end + + def self.parse_difficulty(url) + if /green\.gif/ =~ url + "Easier" + elsif /blue\.gif/ =~ url + "Intermediate" + elsif /black\.gif/ =~ url + "Advanced" + elsif /doubleBlack\.gif/ =~ url + "Expert" + elsif /freestyle\.gif/ =~ url + "Freestyle" + end + end + + def self.parse_lift_name(value) + data = /([^(]+)\(([^']*)'\)/.match(value) + [data[1].strip, data[2].to_i] + end +end + +MTN_REPORT_XPATH = { + last_updated: ["//table[1]/tr[1]/td[2]/text()", :last, Transforms.method(:parse_date_time)], + report_for: ["//table[1]/tr[2]/td[2]/text()", 1, Transforms.method(:parse_date)], + snowfall_24hr: ["//table[1]/tr[3]/td[2]/text()", :first, Transforms.method(:to_int)], + snowfall_48hr: ["//table[1]/tr[4]/td[2]/text()", :first, Transforms.method(:to_int)], + snowfall_7day: ["//table[1]/tr[5]/td[2]/text()", :first, Transforms.method(:to_int)], + snowfall_season: ["//table[1]/tr[6]/td[2]/text()", :first, Transforms.method(:to_int)], + base_depth: ["//table[1]/tr[7]/td[2]/text()", :first, Transforms.method(:parse_range)], + snowmaking_current: ["//table[1]/tr[8]/td[2]/text()", :first, Transforms.method(:is_true)], + snowmaking_24hours: ["//table[1]/tr[9]/td[2]/text()", :first, Transforms.method(:is_true)], + primary_surface: ["//table[1]/tr[10]/td[2]/text()", :first, String.method(:new)], + secondary_surface: ["//table[1]/tr[11]/td[2]/text()", :first, String.method(:new)], +} + +def get_transformed_value(page, xpath, finder, transform) + data = page.xpath(xpath) + data = finder.is_a?(Symbol) ? data.send(finder) : data[finder] + transform.call(data.text) +end + +def get_trail_info(page, row) + data = page.xpath("//table[2]/tr[#{row}]/child::*").map(&:text) + difficulty_info = page.xpath("//table[2]/tr[#{row}]/td/img/@src").to_s + + if data.size != 6 + raise InvalidData.new("Invalid data in row") + end + + { + trail_name: data[0], + difficulty: Transforms.parse_difficulty(difficulty_info), + open_day: Transforms.parse_open(data[2]), + open_night: Transforms.parse_open(data[3]), + groomed: Transforms.parse_groomed(data[4]), + snow_making: Transforms.parse_snowmaking(data[5]), + } +end + +def get_lift_info(page, row) + data = page.xpath("//table[3]/tr[#{row}]/child::*").map(&:text) + + if data.size != 4 + raise InvalidData.new("Invalid data in row") + end + + lift_name, vertical = Transforms.parse_lift_name(data[0]) + + { + lift_name: lift_name, + vertical: vertical, + open_day: Transforms.parse_open(data[1]), + open_night: Transforms.parse_open(data[2]), + notes: data[3], + } +end + +i = 3 +while true + begin + puts get_trail_info(page, i) + i += 1 + rescue InvalidData + break + end +end + +k = {} +MTN_REPORT_XPATH.each do |key, value| + data = get_transformed_value(page, *value) + k[key] = data +end + +puts k + +i = 2 +while true + begin + puts get_lift_info(page, i) + i += 1 + rescue InvalidData + break + end +end -- cgit v1.2.3