Web からテキストを取得してハッシュに変換する
Web からデータを取得して、あとは Mix-in で取り込んだ Filter 機能でデータをごりごり加工していく。
#!/usr/bin/ruby require 'uri' require 'net/http' Net::HTTP.version_1_2 module Rire class Mapping def self.define(&block) m = new m.instance_eval(&block) unless block.nil? m.extend(Rire::Filter) m end def initialize @entries = [] @options = {} @base_url = nil end def extract_capture(regexp, attr_names, &block) @options[:extract_capture] = {} unless @options.key?(:extract_capture) @options[:extract_capture][:regexp] = regexp @options[:extract_capture][:attr_names] = attr_names @options[:extract_capture][:block] = block end def base_url(url = nil) return nil if url.nil? || url.empty? @base_url = URI.parse(url) end def fetch(url = nil, &block) # TODO: 例外? return self unless @base_url.nil? || url.nil? || block.nil? t = '' if !block.nil? t = block.call else if @base_url.nil? uri = URI.parse(url) else uri = @base_url end Net::HTTP.start(uri.host, uri.port) do |http| req = Net::HTTP::Get.new(uri.request_uri) # req["user-agent"] = @user_agent unless @user_agent.empty? # req.basic_auth(@username, @password) if @username && @password res = http.request(req) t = res.body end end _extract(t) end def clear @entries = [] self end def _extract(t) extract_capture = @options[:extract_capture][:regexp] attr_names = @options[:extract_capture][:attr_names] after_block = @options[:extract_capture][:block] @entries = [] t.gsub(extract_capture) do |s| item = {} # TODO: Regexp.last_match.length attr_names.length.times do |i| item[attr_names[i]] = Regexp.last_match[i + 1] end item.instance_eval(&after_block) @entries << item end self end end module Filter def to_hash @entries end def apply(&block) @entries = @entries.map(&block) unless block.nil? self end def compact @entries = @entries.compact self end def print p @entries self end end end if $0 == __FILE__ s = <<EOD <feed> <entry> <name>user01</name> <comment>comment01</comment> </entry> <entry> <name>user03</name> <comment>comment03</comment> </entry> <entry> <name>user05</name> <comment>comment05</comment> </entry> </feed> EOD f = Rire::Mapping.define do base_url 'http://localhost:8080/hoge.txt' extract_capture %r{<name>(.+?)</name>\s+<comment>(.+?)</comment>}m, [:name, :comment] do |e| e[:comment] = "hoge" if e[:name].eql?('user05') end end require 'pp' require 'open-uri' f._extract(s).print.clear f.fetch { open('http://localhost:8080/hoge.txt').read }.print f.fetch('http://localhost:8080/hoge.txt').print f.fetch.print end