#!/usr/local/bin/perl # # Pick N moderately random hosts from a file that will respond to a "ping". # N=1 if no args are given. # # Usage: $0 DB [N] # # (Install it in SATAN's main dir; it uses some of the SATAN libraries.) # # I've found this to be a tricky thing. Admittedly without giving this # a significant amount of thought, but thinking about it a bit, I've come # up with this methodology - it doesn't guarantee N hosts, but it should # come close, by my experience. And they seem pretty darn random, which # is even more important. The main problem is that all the hosts # on the internet take up > 500MB of space, and I would like to be able # to run this on my poor machine (which is cramped on disk and # memory space already.) I'm sucking the hosts in from ftp.nw.com, BTW, # so that's another potential problem - the hosts are 4-6 months old, and # many aren't around anymore. However, I think that's essentially a # random error, so I think I'm ok. # # So - take a hostfile; I assume that it's in "IP# hostname" format, one # per line. I then start by calculating 20 times the number of hosts I want. # I then generate that many distinct random #'s up to the limit of hosts # in the file. Then I simply read through the file, and when I hit a # number I've generated, I test that IP # with ping and see if it answers. # I then simply run through the file until I either run out of file or # I get the # of hosts I want. # # The one possibly significant problem with this is that I realize that # the distribution is not really random. I chose 20 as an upper limit; # however, if *most* files have at least N hosts that answer to ping, # then I've fucked the distribution, because I'm cycling through the # IP numbers from lower to upper, so IP #'s like x.y.z.1 will be much # more likely than x.y.z.250. It shouldn't be a big deal, however, 'cause # it only makes difference if you have a few subnets in the hostfile - as # that number grows, this becomes less important, because it only affects # the final subnet.) # # The only other potential problem I can think of is that the timeout is only # set to be 5 seconds right now; if you have a hostfile that has hosts all # over the place, then hosts that are significantly farther away and might take # a long time to ping the host and thus it would favor closer hosts. Tough ;-) # # It's trivial to solve if you have unlimited mem - just suck all the # hosts into memory, and you can really do a real search. But the .com file # is over 200MB itself, and that's just to impratical for the amount of time # I'm willing to set my feeble brain at this problem. # $running_under_satan = 1; # assume we're in the satan directory... require 'perl/get_host.pl'; require 'config/paths.pl'; # # Ping timeouts in this many seconds... # $timeout = 5; # # Magic number... # $X = 20; die "Usage: $0 DB number-of-hosts-wanted\n" unless ($#ARGV == 1); if ($#ARGV == 0) { warn "picking one host at random...\n"; $N = 1; } else { # # DB, formatted - IPhostname $db = $ARGV[0]; $N = $ARGV[1]; warn "picking $N hosts at random from $db...\n"; } warn "Calculating the # of hosts... be patient, this could take awhile...\n"; $number_o_hosts = `wc -l < $db`; chop($number_o_hosts); die "The DB ($db) has to have $X times the hosts that you're asking for...\n" unless $number_o_hosts > ($X * $N); warn "Searching through $number_o_hosts hosts\n"; # get the ol' psuedo-random # generator working... srand(); # # how many tries before we get a hit? $attempts = 0; die "Can't open $db\n" unless open (DB, $db); # # get $X times the number of possible hosts we want, see how many that # will give us... # print "\$n: $n < \$X: $X * \$N: $N\n"; while ($n < ($X*$N)) { # an error flag $error = 0; # # Ok, find a random number that hasn't been used... # $not_found = 1; while ($not_found) { # # We want to generate #'s between 1 and max-host-number $num = int(rand($number_o_hosts + 1)); # print "array: $num "; warn "."; if (defined($all_hosts{$num})) { next; } $all_hosts{$num} = 1; $not_found = 0; # $n is the number of *potential* hosts that we found; # basically $N*$X $n++; } } warn "\n"; # # $m is the number of hosts that we've found that answer to a ping, # $N is the number of hosts we're looking for. # $n is the number of *potential* hosts that we found; basically $N*$X # $i is the current line in the DB we're examining. # $m = 0; $curr = 0; # # So, we search until we: # # 1) Find the requested number of hosts # 2) Run out of hosts in the file to look for # 3) Run out of random # while ($m < $N && ($next_line = )) { ($ip, $hostname) = split(/\s/, $next_line); $i++; next unless $all_hosts{$i}; # print "IP#: $ip\n"; # one more ping... $attempts++; warn "pinging $ip... "; die "Can't ping\n" unless open (PING, "$PING $ip $timeout|"); $error = 0; while () { if ($_ =~ /no answer/) { warn "IP $ip doesn't answer...\n"; $error = 1; } elsif ($_ =~ /unknown host/) { warn "IP $ip is an unknown host...\n"; $error = 1; } elsif ($_ =~ /ICMP Time Exceeded/) { warn "IP $ip ICMP time unreachable...\n"; $error = 1; } elsif ($_ =~ /ICMP Host Unreachable/) { warn "IP $ip is ICMP unreachable...\n"; $error = 1; } elsif ($_ =~ /ICMP Net Unreachabl/) { warn "IP $ip NET is ICMP unreachable...\n"; $error = 1; } elsif ($_ =~ /is alive/) { warn "Cool! ==> $ip is alive (from $db)\n"; } else { chop($_); warn "ping is crazy! ($_)\n"; $error = 1; } close (PING); last if ($error); # $ip = &get_host_name($ip); $live_ones{$ip} = $ip; $m++; warn "Adding $ip to list of good hosts ($m/$N)\n"; } } if (defined(%live_ones)) { warn "The following hosts ($m/$N) answered to ping:\n"; for $ip (keys %live_ones) { print "$ip\n"; } } else { die "No hosts found!\n"; } # finis