Changeset 233
- Timestamp:
- 02/26/08 12:28:22 (6 months ago)
- Files:
-
- rubygems/tarantula/trunk/lib/relevance/tarantula.rb (modified) (2 diffs)
- rubygems/tarantula/trunk/lib/relevance/tarantula/html_document_handler.rb (modified) (2 diffs)
- rubygems/tarantula/trunk/lib/relevance/tarantula/recording.rb (added)
- rubygems/tarantula/trunk/test/relevance/tarantula/html_document_handler_test.rb (modified) (1 diff)
- rubygems/tarantula/trunk/test/relevance/tarantula/invalid_html_handler_test.rb (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
rubygems/tarantula/trunk/lib/relevance/tarantula.rb
r202 r233 22 22 def verbose 23 23 ENV["VERBOSE"] 24 end 24 end 25 25 end 26 26 end … … 31 31 require 'relevance/core_extensions/response' 32 32 33 require 'relevance/tarantula/recording' 33 34 require 'relevance/tarantula/response' 34 35 require 'relevance/tarantula/result' rubygems/tarantula/trunk/lib/relevance/tarantula/html_document_handler.rb
r176 r233 5 5 def initialize(crawler) 6 6 @crawler = crawler 7 end 8 # HTML::Document shouts to stderr when it sees ugly HTML 9 # We don't want this -- the InvalidHtmlHandler will deal with it 10 def html_doc_without_stderr_noise(html) 11 body = nil 12 Recording.stderr do 13 body = HTML::Document.new html 14 end 15 body 7 16 end 8 17 def handle(result) … … 10 19 url = result.url 11 20 return unless response.html? 12 body = HTML::Document.new response.body21 body = html_doc_without_stderr_noise(response.body) 13 22 body.find_all(:tag=>'a').each do |tag| 14 23 queue_link(tag['href'], url) rubygems/tarantula/trunk/test/relevance/tarantula/html_document_handler_test.rb
r176 r233 5 5 before do 6 6 @handler = Relevance::Tarantula::HtmlDocumentHandler.new(nil) 7 end 8 9 it "does not write HTML Scanner warnings to the console" do 10 bad_html = "<html><div></form></html>" 11 err = Recording.stderr do 12 @handler.handle(Result.new(:response => stub(:html? => true, :body => bad_html))) 13 end 14 err.should == "" 7 15 end 8 16 rubygems/tarantula/trunk/test/relevance/tarantula/invalid_html_handler_test.rb
r176 r233 5 5 before do 6 6 @handler = Relevance::Tarantula::InvalidHtmlHandler.new 7 end 8 9 it "does not write HTML Scanner warnings to the console" do 10 bad_html = "<html><div></form></html>" 11 err = Recording.stderr do 12 @handler.handle(Result.new(:response => stub(:html? => true, :body => bad_html))) 13 end 14 err.should == "" 7 15 end 8 16
