Changeset 233

Show
Ignore:
Timestamp:
02/26/08 12:28:22 (6 months ago)
Author:
stu
Message:

eliminate build noise from HTML::Document

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • rubygems/tarantula/trunk/lib/relevance/tarantula.rb

    r202 r233  
    2222    def verbose 
    2323      ENV["VERBOSE"] 
    24     end 
     24    end     
    2525  end 
    2626end 
     
    3131require 'relevance/core_extensions/response' 
    3232 
     33require 'relevance/tarantula/recording' 
    3334require 'relevance/tarantula/response' 
    3435require 'relevance/tarantula/result' 
  • rubygems/tarantula/trunk/lib/relevance/tarantula/html_document_handler.rb

    r176 r233  
    55  def initialize(crawler) 
    66    @crawler = crawler 
     7  end               
     8  # HTML::Document shouts to stderr when it sees ugly HTML 
     9  # We don't want this -- the InvalidHtmlHandler will deal with it 
     10  def html_doc_without_stderr_noise(html)   
     11    body = nil 
     12    Recording.stderr do 
     13      body = HTML::Document.new html 
     14    end        
     15    body 
    716  end 
    817  def handle(result) 
     
    1019    url = result.url 
    1120    return unless response.html? 
    12     body = HTML::Document.new response.body 
     21    body = html_doc_without_stderr_noise(response.body) 
    1322    body.find_all(:tag=>'a').each do |tag| 
    1423      queue_link(tag['href'], url) 
  • rubygems/tarantula/trunk/test/relevance/tarantula/html_document_handler_test.rb

    r176 r233  
    55  before do 
    66    @handler = Relevance::Tarantula::HtmlDocumentHandler.new(nil) 
     7  end 
     8   
     9  it "does not write HTML Scanner warnings to the console" do 
     10    bad_html = "<html><div></form></html>"     
     11    err = Recording.stderr do 
     12      @handler.handle(Result.new(:response => stub(:html? => true, :body => bad_html))) 
     13    end 
     14    err.should == "" 
    715  end 
    816   
  • rubygems/tarantula/trunk/test/relevance/tarantula/invalid_html_handler_test.rb

    r176 r233  
    55  before do 
    66    @handler = Relevance::Tarantula::InvalidHtmlHandler.new 
     7  end 
     8     
     9  it "does not write HTML Scanner warnings to the console" do 
     10    bad_html = "<html><div></form></html>"     
     11    err = Recording.stderr do 
     12      @handler.handle(Result.new(:response => stub(:html? => true, :body => bad_html))) 
     13    end 
     14    err.should == "" 
    715  end 
    816