Web からテキストを取得してハッシュに変換する

Web からデータを取得して、あとは Mix-in で取り込んだ Filter 機能でデータをごりごり加工していく。

#!/usr/bin/ruby

require 'uri'
require 'net/http'

Net::HTTP.version_1_2

module Rire
  class Mapping
    def self.define(&block)
      m = new
      m.instance_eval(&block)  unless block.nil?
      m.extend(Rire::Filter)
      m
    end

    def initialize
      @entries = []
      @options = {}
      @base_url = nil
    end

    def extract_capture(regexp, attr_names, &block)
      @options[:extract_capture] = {}  unless @options.key?(:extract_capture)

      @options[:extract_capture][:regexp]     = regexp
      @options[:extract_capture][:attr_names] = attr_names
      @options[:extract_capture][:block]      = block
    end

    def base_url(url = nil)
      return nil  if url.nil? || url.empty?
      @base_url = URI.parse(url)
    end

    def fetch(url = nil, &block)
      # TODO: 例外?
      return self  unless @base_url.nil? || url.nil? || block.nil?

      t = ''

      if !block.nil?
        t = block.call
	  else
        if @base_url.nil?
          uri = URI.parse(url)
        else
          uri = @base_url
        end

        Net::HTTP.start(uri.host, uri.port) do |http|
          req = Net::HTTP::Get.new(uri.request_uri)
#         req["user-agent"] = @user_agent  unless @user_agent.empty?
#         req.basic_auth(@username, @password)  if @username && @password

          res = http.request(req)

          t = res.body
        end
      end

      _extract(t)
    end

    def clear
      @entries = []
      self
    end

    def _extract(t)
      extract_capture = @options[:extract_capture][:regexp]
      attr_names = @options[:extract_capture][:attr_names]
      after_block = @options[:extract_capture][:block]

      @entries = []
      t.gsub(extract_capture) do |s|
        item = {}

        # TODO: Regexp.last_match.length
        attr_names.length.times do |i|
          item[attr_names[i]] = Regexp.last_match[i + 1]
        end

        item.instance_eval(&after_block)

        @entries << item
      end

      self
    end
  end

  module Filter
    def to_hash
      @entries
    end

    def apply(&block)
      @entries = @entries.map(&block)  unless block.nil?
      self
    end

    def compact
      @entries = @entries.compact
      self
    end

    def print
      p @entries
      self
    end
  end
end


if $0 == __FILE__
  s = <<EOD
<feed>
  <entry>
    <name>user01</name>
    <comment>comment01</comment>
  </entry>
  <entry>
    <name>user03</name>
    <comment>comment03</comment>
  </entry>
  <entry>
    <name>user05</name>
    <comment>comment05</comment>
  </entry>
</feed>
EOD

  f = Rire::Mapping.define do
    base_url 'http://localhost:8080/hoge.txt'

    extract_capture %r{<name>(.+?)</name>\s+<comment>(.+?)</comment>}m, [:name, :comment] do |e|
      e[:comment] = "hoge"  if e[:name].eql?('user05')
    end
  end

  require 'pp'
  require 'open-uri'

  f._extract(s).print.clear
  f.fetch { open('http://localhost:8080/hoge.txt').read }.print
  f.fetch('http://localhost:8080/hoge.txt').print
  f.fetch.print

end