Changeset 284
- Timestamp:
- 03/21/08 10:22:36 (5 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
rubygems/tarantula/trunk/lib/relevance/tarantula/crawler.rb
r272 r284 22 22 @referrers = {} 23 23 @skip_uri_patterns =[ 24 /^javascript/, 24 25 /^mailto/, 25 26 /^http/, rubygems/tarantula/trunk/lib/relevance/tarantula/html_report_helper.rb
r273 r284 25 25 26 26 def textmate_url(file, line_no) 27 "txmt://open?url= #{File.expand_path(File.join(rails_root,file))}&line_no=#{line_no}"27 "txmt://open?url=file://#{File.expand_path(File.join(rails_root,file))}&line_no=#{line_no}" 28 28 end 29 29 rubygems/tarantula/trunk/test/relevance/tarantula/crawler_test.rb
r272 r284 219 219 end 220 220 221 end 222 223 describe "Crawler link skipping" do 224 before do 225 @crawler = Crawler.new 226 end 227 221 228 it "skips links that are too long" do 222 crawler = Crawler.new 223 crawler.should_skip_link?("/foo").should == false 224 crawler.max_url_length = 2 225 crawler.expects(:log).with("Skipping long url /foo") 226 crawler.should_skip_link?("/foo").should == true 229 @crawler.should_skip_link?("/foo").should == false 230 @crawler.max_url_length = 2 231 @crawler.expects(:log).with("Skipping long url /foo") 232 @crawler.should_skip_link?("/foo").should == true 227 233 end 228 234 229 235 it "skips outbound links (those that begin with http)" do 230 crawler = Crawler.new 231 crawler.expects(:log).with("Skipping http-anything") 232 crawler.should_skip_link?("http-anything").should == true 236 @crawler.expects(:log).with("Skipping http-anything") 237 @crawler.should_skip_link?("http-anything").should == true 238 end 239 240 it "skips javascript links (those that begin with javascript)" do 241 @crawler.expects(:log).with("Skipping javascript-anything") 242 @crawler.should_skip_link?("javascript-anything").should == true 233 243 end 234 244 235 245 it "skips mailto links (those that begin with http)" do 236 crawler = Crawler.new 237 crawler.expects(:log).with("Skipping mailto-anything") 238 crawler.should_skip_link?("mailto-anything").should == true 246 @crawler.expects(:log).with("Skipping mailto-anything") 247 @crawler.should_skip_link?("mailto-anything").should == true 239 248 end 240 249 241 250 it 'skips blank links' do 242 crawler = Crawler.new 243 crawler.queue_link(nil) 244 crawler.links_to_crawl.should == [] 245 crawler.queue_link("") 246 crawler.links_to_crawl.should == [] 251 @crawler.queue_link(nil) 252 @crawler.links_to_crawl.should == [] 253 @crawler.queue_link("") 254 @crawler.links_to_crawl.should == [] 247 255 end 248 256 249 257 it "logs and skips links that match a pattern" do 250 crawler = Crawler.new 251 crawler.expects(:log).with("Skipping /the-red-button") 252 crawler.skip_uri_patterns << /red-button/ 253 crawler.queue_link("/blue-button").should == "/blue-button" 254 crawler.queue_link("/the-red-button").should == nil 258 @crawler.expects(:log).with("Skipping /the-red-button") 259 @crawler.skip_uri_patterns << /red-button/ 260 @crawler.queue_link("/blue-button").should == "/blue-button" 261 @crawler.queue_link("/the-red-button").should == nil 255 262 end 256 263 257 264 it "logs and skips form submissions that match a pattern" do 258 crawler = Crawler.new 259 crawler.expects(:log).with("Skipping /reset-password-form") 260 crawler.skip_uri_patterns << /reset-password/ 265 @crawler.expects(:log).with("Skipping /reset-password-form") 266 @crawler.skip_uri_patterns << /reset-password/ 261 267 fs = stub_everything(:action => "/reset-password-form") 262 crawler.should_skip_form_submission?(fs).should == true 263 end 264 268 @crawler.should_skip_form_submission?(fs).should == true 269 end 265 270 end 266 271
