From: Adam Dickmeiss Date: Tue, 23 Jan 2001 11:26:43 +0000 (+0000) Subject: Added options for the robot. X-Git-Tag: ZMBOT.0.1~30 X-Git-Url: http://sru.miketaylor.org.uk/cgi-bin?a=commitdiff_plain;h=14639758cbc66407c40bde3d09894cfaff6da0e8;p=tclrobot.git Added options for the robot. --- diff --git a/robot.tcl b/robot.tcl index ddbfb82..bad9a25 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.10 2001/01/23 09:20:32 adam Exp $ +# $Id: robot.tcl,v 1.11 2001/01/23 11:26:43 adam Exp $ # proc RobotFileNext1 {area lead} { puts "RobotFileNext1 area=$area lead=$lead" @@ -260,6 +260,13 @@ proc RobotHref {url hrefx hostx pathx} { upvar $pathx path puts "Ref url = $url href=$href" + + if {[string first { } $href] >= 0} { + return 0 + } + if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} { + return 0 + } # get method (if any) if {![regexp {^([^/:]+):(.*)} $href x method hpath]} { set hpath $href @@ -729,9 +736,10 @@ if {![llength [info commands htmlSwitch]]} { set agent "zmbot/0.0" if {![catch {set os [exec uname -s -r]}]} { set agent "$agent ($os)" - puts "agent: $agent" } +puts "agent: $agent" + proc bgerror {m} { global errorInfo puts "BGERROR $m" @@ -739,29 +747,69 @@ proc bgerror {m} { } set robotsRunning 0 -set robotsMax 5 set robotSeq 0 set workdir [pwd] set idleTime 60000 -if {[llength $argv] < 2} { - puts "Tclrobot: usage " - puts " Example: 3 '*.indexdata.dk' http://www.indexdata.dk/" +set i 0 +set l [llength $argv] + +if {$l < 2} { + puts {tclrobot: usage [-j jobs] [-c count] [-d domain] [url ..]} + puts " Example: -c 3 -d '*.dk' http://www.indexdata.dk/" exit 1 } -set maxDistance [lindex $argv 0] -set domains [lindex $argv 1] -foreach href [lindex $argv 2] { - if {[RobotHref http://www.indexdata.dk/ href host path]} { - if {![RobotFileExist visited $host $path]} { - set outf [RobotFileOpen unvisited $host $path] - RobotWriteRecord $outf $href 0 - RobotFileClose $outf +while {$i < $l} { + set arg [lindex $argv $i] + switch -glob -- $arg { + -j* { + set robotsMax [string range $arg 2 end] + if {![string length $robotsMax]} { + set robotsMax [lindex $argv [incr i]] + } + } + -c* { + set maxDistance [string range $arg 2 end] + if {![string length $maxDistance]} { + set maxDistance [lindex $argv [incr i]] + } + } + -d* { + set dom [string range $arg 2 end] + if {![string length $dom]} { + set dom [lindex $argv [incr i]] + } + lappend domains $dom + } + default { + set href $arg + if {[RobotHref http://www.indexdata.dk/ href host path]} { + if {![RobotFileExist visited $host $path]} { + set outf [RobotFileOpen unvisited $host $path] + RobotWriteRecord $outf href 0 + RobotFileClose $outf + } + } } } + incr i } +if {![info exist domains]} { + set domains {*} +} +if {![info exist maxDistance]} { + set maxDistance 3 +} +if {![info exist robotsMax]} { + set robotsMax 5 +} + +puts "domains=$domains" +puts "max distance=$maxDistance" +puts "max jobs=$robotsMax" + RobotStart while {$robotsRunning} {