Upload
rob
View
2.434
Download
2
Embed Size (px)
DESCRIPTION
Morph and Pottery rubygem utilities for screen scrapers.
Citation preview
code == data
data == code
OpenStruct
Photo: Salt Fired http://www.flickr.com/photos/saltfired/201994906/
require 'ostruct'
o = OpenStruct.new
o.name = 'el rug'
o.name
=> "el rug"
o.inspect
=> <OpenStruct name=\"el rug\">
# not very classy
o.class
=> OpenStruct
class Fund < OpenStruct
def your_logicend
end
public class Fundextends HashMap {
}
public class Fundextends HashMap {
/* bad code smell */
}
public class Fundextends HashMap<String,
Object> {
}
public class Fundextends HashMap<String,
Object> {
/* this stinks! */
}
Morph
Photo: Salt Firedhttp://www.flickr.com/photos/saltfired/201998836/
gem install morph
require 'morph'
require 'hpricot'
require 'open-uri'
class Hubbit
include Morph
def initialize name
doc = Hpricot open"http://github.com/#{name}"
(doc/'label').collect do |l|
label = l.inner_text
value = l.next_sibling.inner_text.strip
morph(label, value)
class Hubbit
include Morph
def initialize name begin doc = Hpricot open("http://github.com/#{name}")
(doc/'label').collect do |node| label = node.inner_text value = node.next_sibling.inner_text.strip
morph(label, value)
end rescue raise "Couldn't find hubbit with name: #{name}" end endend
Hubbit.morph_methods
=> []
why = Hubbit.new 'why'
=> #<Hubbit @name="why the lucky
stiff", @email="why@why...">
Hubbit.morph_methods
=>["email", "email=","name", "name="]
why.name
=> "why the lucky stiff"
why. 年龄 = 21
why. 年龄
=> 21
why.company
NoMethodError: undefined method
'company'
# maybe should have
why.company?
# but that's not there yet
dhh = Hubbit.new 'dhh'
Hubbit.morph_methods
=> ["blog", "blog=", "company", "company=",
"email", "email=", "location", "location=",
"name", "name=","年龄 ", "年龄 ="]
dhh.company
=> "37signals"
why.company
=> nil
implementation
def method_missing sym, *argsis_writer = sym.to_s =~ /=$/
is_writer? morph_method_missing(sym, *args): super
end
def morph_method_missing symbol, *args attribute = symbol.to_s.chomp '=' # ... if block_given? yield self.class, attribute else self.class.class_eval
"attr_accessor :#{attribute}" send(symbol, *args) end # ...end
Soup
Photo: Chrissy Wainwrighthttp://www.flickr.com/photos/wainwright/380578681/
gem install soup
require 'soup'
Soup.prepare
s = Snip.new
s.name = 'el rug'
s.inspect
=> "<Snip id:unset name:el rug>"
s.save
=> "<Snip id:1 name:el rug>"
s = Snip['el rug']
=> "<Snip id:1 name:el rug>"
# has no class
s.class
=> nil
BlankSlate
class EmptyClass
instance_methods.each { |m| undef_method(m) unless m =~ /^(__|instance_eval|respond_to\?)/ }
end
class Snip < EmptyClass; end
Pottery
Photo: zhaoshourenhttp://www.flickr.com/photos/ajanhelendam/2326369128/
gem install pottery
def get_price_rows doc rows = rows_starting 'Bid(GBX)', doc
@bid_offer = rows.size > 0
rows = rows_starting 'Nav(GBX)', doc unless @bid_offer
rows end
def rows_starting label, doc
(doc/"table/tr/td/[text()='#{label}']/../../../tr")
end
def each_entry doc
get_price_rows(doc).each do |row|
cells = (row/'td').collect(&:inner_text). collect(&:strip).delete_if(&:blank?)
cells.in_groups_of(2) do |entry| yield entry[0], entry[1] end
end
end
doc = open_doc url
each_entry doc do |label, value| morph(label, value) end
time = Time.now.utc.to_s self.time = time.match(/\d\d:\d\d:\d\d/)[0] self.name = doc.at('.FundNameHeader').inner_text self.url = url self.date = Date.today.to_s self.id_name = "#{url}##{date}"
require 'pottery'
class Fund
include Pottery
def initialize fund=nil if fund url = "http://funds.ft.com/funds/#{fund}" doc = open_doc url
each_entry doc do |label, value| morph(label, value) end
time = Time.now.utc.to_s self.time = time.match(/\d\d:\d\d:\d\d/)[0] self.name = doc.at('.FundNameHeader').inner_text self.url = url self.date = Date.today.to_s self.id_name = "#{url}##{date}" end end
def bid_price @bid_offer ? bid_gbx : nav_gbx end
def offer_price @bid_offer ? offer_gbx : '' end
private
def each_entry doc get_price_rows(doc).each do |row| cells = (row/'td').collect(&:inner_text).collect(&:strip).delete_if(&:blank?) cells.in_groups_of(2) do |entry| yield entry[0], entry[1] end end end
def get_price_rows doc rows = rows_starting 'Bid(GBX)', doc @bid_offer = rows.size > 0 rows = rows_starting 'Nav(GBX)', doc unless @bid_offer rows end
def rows_starting label, doc (doc/"table/tr/td/[text()='#{label}']/../../../tr") end
end # of Fund
fund = Fund.new 'rufferllp/ruffer/RZBST'
Fund.morph_methods
["_52w_high", "_52w_high=", "_52w_low", "_52w_low=",
"change", "change=","date", "date=",
"gross_yield", "gross_yield=", "id_name", "id_name=",
"listed_yield", "listed_yield=", "name", "name=",
"nav_gbx", "nav_gbx=", "net_yield", "net_yield=",
"percentage_change", "percentage_change=", "time",
"time=", "url", "url="]
fund.save
Fund.restore 'rufferllp/ruffer/RZBST#2008-04-14'
#<Fund:0x1857414 @percentage_change="+0.96",
@gross_yield="-", @id_name="rufferllp/ruffer/RZBST#2008-04-14", @net_yield="-", @bid_offer=false, @date="2008-04-14", @_52w_low="142.38", @listed_yield="-", @time="23:00:14",
@name="Ruffer CF Baker Steel Gold O Acc NAV", @nav_gbx="183.90",
@url="rufferllp/ruffer/RZBST", @change="+1.74", @_52w_high="209.88">
Future features?
identifydata types
e.g. integer, date, string
generate Rails generator line
e.g. script/generate model x:string
y:integer
generate doodle definition!
data == code
http://code.whytheluckystiff.net/hpricot
http://github.com/lazyatom/soup
http://github.com/robmckinnon/morph
http://github.com/robmckinnon/pottery