|
|
Jump to this file's LXR Page |
|
|
File: [CENS] / emstar / devel / sympathy_devel / sars.pl
(download)
/
(as text)
Revision: 1.34, Thu Jul 20 23:26:09 2006 UTC (3 years, 4 months ago) by nithya Branch: MAIN CVS Tags: pregeonet, PRE_TOSNIC_FIX, PRE_64BIT, HEAD, CYCLOPS_RELEASE_CANDIDATE_2_0, CYCLOPS_PRERELEASE_STABLE, CENTROUTE_EMSTAR_SOCKETS, AMARSS_JR_DEPLOYMENT_6_05_07 Changes since 1.33: +8 -1 lines *** empty log message *** |
#!/usr/bin/perl
#
# sars - Sympathy Automated Regression System
use strict;
# global variables, change this to accomodate your own environments
my(%G);
#default variable settings
# determines what is considered a "success". 3 is
# a correct root-cause and localization. 2 is just
# a correct root-cause, but not necessarily localization.
# 1 is some root-cause at the node.
my $SUCCESS = 3;
my $epoch = 3;
my $METRICS_PERIOD=30;
my $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
my $INITIAL_DELAY = 15 * 60;
my $EMSTAR_HOME="$ENV{HOME}/cvs/emstar_test/emstar";
my $JITTER_SCRIPT="$ENV{HOME}/bin/inject_jitter";
my($SIM_FILE, $NODES, @NODESKIP);
$NODES = 30;
if (!-e $JITTER_SCRIPT) {
print "Script to inject jitter ($JITTER_SCRIPT) not found\n";
exit 1;
}
# $SIM_FILE="./sympathy_sim.sim";
$SIM_FILE="../devel/sympathy_devel/testtabs/sympathy_sim.sim";
my $SYMPATHY_DIR="$ENV{HOME}/.sympathy_3"; # where Sympathy regression files are located
my $SIM_CEILING_FILE=" --ceiling ../devel/sympathy_devel/sympathy_ceiling.sim"; #NR
my $SINKNODE='002'; # the heading '00' in '002' is VERY IMPORTANT
my $ITERATIONS=1;
my $SIMULATION = 1;
my $QUIT_AFTER_SUCC=1; # if we successfully found a fault/failure, then quit
select(STDOUT); $|=1;
use POSIX;
use Fcntl;
use FileHandle;
# ----------------- define the instance object --------------------
package Instance;
use vars qw($AUTOLOAD);
my %vars = (
log_dir => undef,
sim_group => 'GROUP_UNDEF',
iter => 1,
summ_fail_handle => '',
#sigsegv_handle => '',
s_iter => 1, # current iteration
s_node => undef,
faults => 0,
failures => 0,
count => 0, # status
previter => -1,
succ => -1, # highest success status
quit_timer => 30,
);
sub new {
my $me = shift;
my $class = ref($me) || $me;
my $self = {%vars};
bless $self, $class;
return $self;
}
sub keys {
my $self = shift;
return (keys %$self);
}
sub AUTOLOAD {
my $self = shift;
my $type = ref($self) || die "$self is not an object";
my $name = $AUTOLOAD;
$name =~ s/.*://;
if (@_) {
return $self->{$name} = shift;
} elsif (exists $self->{$name}) {
return $self->{$name};
} elsif ($name eq 'DESTROY') {
return undef;
} else {
print "ERROR: entry object->$name() not found\n";
exit 1;
#return undef;
}
}
# -----------------------------------------------------------------
package main;
my @INSTANCES;
my @FILEHANDLES;
my %BUFFER;
my $INSTANCES=1; # default num of instances run
my %MESSAGES; # mapping between STF_OK and "OK"
my %NODE; # success/fail info (SRC_NO_DATA, SRC_NODE_FAILED, etc)
my %HANDLE2FILE; # associate handle with command
my $LOG_SUFFIX=''; # extra information
my $EXIT_TIME=60*60*2; # kill EVERYTHING in 2 hours for safety
my $STDOUT_NO_CR=0; # for pretty print purpose
my $BZIP2=0;
# exception handling
$SIG{INT}=\&handler;
$SIG{HUP}=\&handler;
$SIG{STOP}=\&handler;
my @PID;
sub killProcesses {
if ($#PID!=-1) {
printw("Cleaning processes ",join(', ',@PID),"...");
my $pids=join(' ', @PID);
# Do the following instead of Perl's kill because Perl's kill hangs
system("(kill $pids > /dev/null 2>&1) &");
system("(sleep 10; kill -9 $pids > /dev/null 2>&1) &");
printw("done\n");
}
#foreach my $handle (@FILEHANDLES) {close($handle);}
@PID=@FILEHANDLES=@INSTANCES=%BUFFER=%HANDLE2FILE=();
}
sub handler {
my $pids=join(' ', @PID);
killProcesses();
system("(kill -9 $pids > /dev/null 2>&1) &");
exit 1;
}
sub hasLine {
my($handle)=@_;
if ($BUFFER{$handle}=~/\n/) {return 1;}
}
sub getLine {
my($handle)=@_;
# Get rid of new-lines!
while ($BUFFER{$handle}=~/^\n/)
{
$BUFFER{$handle}=~s/(^\n)(.*)/$2/;
}
if ($BUFFER{$handle}=~s/^([^\n]+\n)(.*)/$2/) {return $1;}
}
sub processFileHandles {
#fcntl($handle, F_SETFL(), O_NONBLOCK());
my $buf;
foreach my $handle (@FILEHANDLES) {
my $bytes_read=1024;
my $bound=100; # safety for switching tasks
while ($bytes_read && $bound-->0) {
# IMPORTANT, below is a non-blocking call
$bytes_read = sysread($handle, $buf, 1024);
if (defined($bytes_read)) {
if ($bytes_read == 0) {
# Remote socket closed connection
warn "Handle closed!\n";
close($handle);
last;
} else {
$BUFFER{$handle} .= $buf;
}
} else {
my $stat = $!;
if ($stat == EAGAIN()) {
# Can return to select. Here we choose to
# spin around waiting for something to read.
} else {
last;
}
#print "Stat:$stat\n";
}
}
}
}
sub openAsynchCommand {
my($command)=@_;
my $handle = new FileHandle;
$HANDLE2FILE{$handle}=$command;
push @FILEHANDLES, $handle;
#open($handle, "$ENV{HOME}/bin/loop 2>&1 |");
open($handle, "$command 2>&1 |") || die "Can't execute $command";
fcntl($handle, F_SETFL, O_NONBLOCK | O_RDWR | O_NDELAY)
or die "Couldn't set flags for HANDLE: $!\n";
return $handle;
}
sub printw {
if ($G{verbose} && $STDOUT_NO_CR) {
$STDOUT_NO_CR=0;
print "\n";
}
print @_;
}
# Se open and then close to make sure we flush all the logs.
# Slightly inefficient but simple.
sub logw {
my($obj, $line)=@_;
$line=~s/\s+$//; # strip trailing space
my $file="$ENV{LOG_DIR}/sars.log";
open(WR, ">>$file") || die "Can't write to $file";
print(WR $line."\n");
close(WR);
printw "> $line\n";
}
sub processSummFail {
my($obj, $timepassed)=@_;
my $handle=$obj->summ_fail_handle;
my($s_node, $s_iter);
if (!$handle) {return 1}
while (hasLine($handle)) {
my $line=getLine($handle);
if ($line=~/\*\*\*\*\*\*\*\*/) {
next;
} elsif ($line=~/Node\s+(\d+).*Metric Pd:\s+(\d+)/) {
$s_node=$1; $s_iter=$2;
if ($s_iter != $obj->s_iter) {
$obj->s_iter($s_iter);
$obj->count(0);
$obj->failures(0);
$obj->faults(0);
if ($G{verbose} && $s_iter!=$obj->previter) {
printw("Group:".$obj->sim_group." time:$timepassed sink iteration:$s_iter\n");
$obj->previter($s_iter);
}
}
$obj->s_node($s_node);
$obj->count($obj->count+1); # see if we're done with messages
} else {
foreach my $status (split(/,/,$line)) {
$status=~s/^\s+//;
$status=~s/\s+$//;
if ($status=~/^(.*)\s*(Failure).*Root-Cause: (.+)\(/) {
my $cat=$1;
my $ff=$2;
my $msg=$3;
$msg=~s/\s+$//;
print "now CHECK msg now: $msg\n";
$msg=$MESSAGES{$msg};
print "2 now CHECK msg now: $msg\n";
my $node=$obj->s_node;
print "CHECK node: $node\n";
print "CHECK line: $status\ncomp of failure is: $cat, failure: $msg\n";
print "CHECK cat: $cat\n";
my $mp = $NODE{$node."_$msg"};
print "CHECK node_msg: $mp\n";
if (!($msg eq 'SRC_OK')) {
$obj->failures($obj->failures+1);
my $success=0;
my $success_code = '2';
if ($cat=~/Root/) { $success_code = '3'; }
# if ($cat=~/Node/i) {
$success=($NODE{$node."_$msg"}?$success_code:
$NODE{$node}?'1':'0');
#}
# FALSE report! Wrong node!
if ($success==0 && $msg=~/NODE_FAILED/) {$success=-1}
# record the highest succ level
if ($obj->succ < $success) {$obj->succ($success);}
#iter time s_iter node category type correctness
logw($obj,
sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
$obj->iter, $timepassed,
$obj->s_iter, $node,
$cat, $ff,
$msg,
$success));
}
}
}
}
}
return 1;
}
sub genNode {
my($node)=@_;
return sprintf("node%.3d", $node);
}
sub launch {
my($cmd, $logfile)=@_;
if ($logfile) {
$cmd.=" > $ENV{LOG_DIR}/$logfile";
} else {
my @file=split(/\//, $cmd);
my $file=$file[$#file];
$cmd.=" > $ENV{LOG_DIR}/._$file"; # make it hidden
}
# EXECUTE below
my $pid=`$cmd 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid);
push(@PID, $pid);
printw("> Launching command: $cmd ($pid)\n");
return $pid
}
sub log_script {
my $sinknode=genNode($SINKNODE);
my $linkdump="$EMSTAR_HOME/obj.i686-linux/bin/linkdump";
my $echocat="$EMSTAR_HOME/obj.i686-linux/bin/echocat";
my @pids;
push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/emlog/all/all-f", "log2"));
push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/tos/logs/error-f", "log_error2"));
push(@pids,
launch("$linkdump -f -l -N 2 -G $ENV{SIM_GROUP} -r -U mote0 -T", "link2"),
launch("$echocat -w /dev/node002/sympathy/metrics", "all_metrics"),
launch("$echocat -w /dev/emrun/last_msg", "last_message"),
launch("$echocat -w /dev/node002/sympathy/summary", "all_summ_fail"));
#foreach my $i (1..$NODES) {
# if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
# my $node=genNode($i);
# push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$node/tos/logs/usr3,usr1-f", "log$i"));
# }
my $pids=join(' ', @pids);
my $time=$EXIT_TIME+5;
my $killpid=`(sleep $time; kill -9 $pids) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($killpid);
push(@PID, $killpid);
printw "After $time seconds, kill $pids\n";
}
# Central place to put in actions
sub processCommand {
my($obj, $cmd, $timepassed)=@_;
my($time,$cmd,$action)=split(/\s+/, $cmd);
$ENV{SIM_GROUP}=$obj->sim_group;
#NR Log injection of failure
logw($obj,
sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
$obj->iter, $timepassed,
$obj->s_iter, $action,
"Inject", "Command", "$cmd".($action?",$action":""), '0'));
if ($G{verbose}) {printw "*Time:$time executing:$cmd $action (group $ENV{SIM_GROUP})\n"}
if ($cmd eq 'exit') {
return 'exit';
} elsif ($cmd eq 'reboot') {
# write into /dev/.../fault_inject
my $node=genNode($action); # $action is the node number here
system("echo reboot > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
if ($?) {printw("Problem injecting 'reboot' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}
} elsif ($cmd eq 'traffic') {
$action*=10; # Convert sec to 100ms units
$cmd="echo 'id=1:sensors=33:period=$action' > /dev/sim/group$ENV{SIM_GROUP}/node$SINKNODE/dse/query";
printw "Injecting traffic with command:\n$cmd\n";
system($cmd);
if ($?) {printw("Problem injecting 'traffic' \n");}
} elsif ($cmd eq 'jitter') {
$cmd="$JITTER_SCRIPT";
printw "Injecting jitter with command:\n$cmd\n";
system($cmd);
if ($?) {printw("Problem injecting 'jitter' \n");}
} elsif ($cmd eq 'die') {
# write into /dev/.../fault_inject
my $node=genNode($action);
system("echo die > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
if ($?) {printw("Problem injecting 'die' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}
} elsif ($cmd eq 'halt') {
my $node=genNode($action);
print "GOING TO HALT NODE!\n";
system("echo halt > /dev/sim/group$ENV{SIM_GROUP}/$node/emrun/command");
if ($?) {printw("Problem injecting 'halt' into $node\n");}
} elsif ($cmd eq 'send' || $cmd eq 'recv') {
my($node,$prob)=(split(/,/, $action));
$node=genNode($action);
system("echo $cmd:node=$node:prob=$prob > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
} elsif ($cmd eq 'monitor') {
# start reading summ_fail
if ($obj->summ_fail_handle) {
warn "Already parsing summ_fail! Command ignored\n";
return;
}
my $sinknode=genNode($SINKNODE);
log_script();
my $handler = openAsynchCommand("$EMSTAR_HOME/obj.i686-linux/bin/echocat -w /dev/$sinknode/sympathy/summary");
$obj->summ_fail_handle($handler);
#my $log_dir=$obj->log_dir;
#my $handler2 = openAsynchCommand("tail -f $log_dir/emrun.log | grep SIG");
#$obj->sigsegv_handle($handler2);
# echo non-sense into nodes (try to kick them start)
printw "Checking if all the nodes are up and running...\n";
foreach my $i (1..$NODES) {
if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
my $node=genNode($i);
system("echo WAKE > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
if ($?) {printw "\tWARNING: Node $i is not ready\n";}
}
} else {
warn "Command ($cmd, $action) not understood\n";
}
}
# ------------------ program initialization --------------------
system("mkdir -p /tmp/$ENV{USER}");
if (!$ENV{HOME}) {die "You need to setup the HOME environment\n"; }
if (!-e $SYMPATHY_DIR) {system("mkdir -p $SYMPATHY_DIR");}
if (!-e $EMSTAR_HOME) {
if (!$ENV{EMSTAR_HOME}) {die "Can't find $EMSTAR_HOME, please set EMSTAR_HOME\n";}
if (!-e $ENV{EMSTAR_HOME}) {die "Can't find $ENV{EMSTAR_HOME}, please re-set EMSTAR_HOME\n"; }
$EMSTAR_HOME = $ENV{EMSTAR_HOME};
}
my $line=`grep METRICS_PERIOD $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h` || die "Can't open $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h";
if ($line=~/METRICS_PERIOD_MSEC\s+(\d+)/) {$METRICS_PERIOD=$1/1000;} else {die "Can't find METRICS_PERIOD in Sympathy.h";}
$TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
print "CHECK metrics-period: $METRICS_PERIOD, track-fail: $TRACK_FAIL_PERIOD\n";
# process arguments
if ($#ARGV==-1) {
printw <<EOL;
Sympathy Automated Regression System (sars) Usage Guide:
> sars.pl [options below]
-instances <num> Number of EmStars to run simultaneously. [default=1]
-iter <num> Number of times to run/generate data [default=1]
-ceiling Run with ceiling cfg file:\n\t$SIM_CEILING_FILE
-monitor <sec> Start reading /dev/*/files after <sec>
-die <node_num> After random time, node <node_num> radio dies
-halt <node_num> After random time, node <node_num> *HALTS* completely
-reboot <node_num> After random time, reboot node <node_num>
-jitter <sec1> After <sec1>, disable jitter on nodes (to force congestion on
Sympathy traffic
-epoch #metrics-pd per failure-epoch
-traffic <sec2> After random time, inject dse-traffic w/ period sec2
-keepgoing If this is specified, simulation will keep running
until time is up. If this is not specified, then
simulation will quit 30 seconds after it has
successfully detected fault/failure.
-exit <sec> After <sec> exit. By default exit after $METRICS_PERIOD*10 seconds
-verbose Print time
-kill Kill previous runs
-bzip2 Use bzip2 to compress logs
Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -die 500:7 -exit 2000 -verbose
Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -send 500:node=3,prob=70 -recv \\
500:node=3,prob=70 500:7 -exit 2000 -verbose
EOL
exit 0;
}
# parse Arguments
my @COMMAND_QUEUE;
my $_prevsec=0;
for (my $i=0; $i<=$#ARGV; $i++) {
my $arg=$ARGV[$i];
if ($arg!~s/^\-//) {die "Argument $arg not recognized";}
if ($arg eq 'kill') {
system("ps auxwww|grep $ENV{USER}|egrep '(emrun|emsim|sympathy_sink|sympathy_app|wmpid)' | awk '{print \$2}'|xargs kill -9");
printw "All Sympathy processes should be killed\n";
exit 0;
} elsif ($arg eq 'bzip2') {
$BZIP2=1;
} elsif ($arg eq 'sim_group') {
my $sim_group=$ARGV[++$i];
if ($sim_group!~/^\d+$/) {die "SIM_GROUP must be between 0-99";}
$ENV{SIM_GROUP}=$sim_group;
} elsif ($arg eq 'iter') {
$ITERATIONS=$ARGV[++$i];
} elsif ($arg eq 'instances') {
$INSTANCES=$ARGV[++$i];
} elsif ($arg eq 'keepgoing') {
$QUIT_AFTER_SUCC=0;
} elsif ($arg eq 'epoch') {
$epoch =$ARGV[++$i];
#Re-calculate the track-fail-period!
$TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
} elsif ($arg eq 'ceiling') {
$SIM_FILE = $SIM_CEILING_FILE;
$SIMULATION = 0;
printw "New sim-file: $SIM_FILE\n";
} elsif ($arg =~/jitter|traffic|monitor|die|halt|recv|send|reboot|exit/) {
my $cmd=$arg;
my($sec,$arg)=split(':',$ARGV[++$i]);
#Randomize time failure is injected!
if ($cmd =~/die|halt|recv|send|reboot/) {
$arg = $sec;
# rand within stats period cuz we check every stats pd for failure
$sec = int(rand($METRICS_PERIOD));
#This is done to get rid of fact that sympathy waits for
# epoch*stats-pd in the beginning. And that the network
# takes some time to form.
print "sec was $sec\n";
#$sec += (3 * $TRACK_FAIL_PERIOD);
$sec += $INITIAL_DELAY;
print "sec is $sec\n";
}
$_prevsec = $sec;
if ($cmd eq 'monitor') {
push(@COMMAND_QUEUE, "$sec $cmd");
} elsif ($cmd eq 'exit') {
push(@COMMAND_QUEUE, "$sec $cmd");
$EXIT_TIME=$sec;
} elsif ($cmd eq 'traffic') {
# arg is the period
if ($arg!~/^\d+/) {
printw "$cmd: Second argument period ($arg) must be a number\n";
exit 1;
}
push(@COMMAND_QUEUE, "$sec $cmd $arg");
$LOG_SUFFIX.="$sec$cmd$arg.";
} elsif ($cmd eq 'jitter') {
push(@COMMAND_QUEUE, "$sec $cmd");
$LOG_SUFFIX.="$sec$cmd.";
} elsif ($cmd eq 'die' || $cmd eq 'halt') {
# arg is node number
if ($arg!~/^\d+/) {
printw "Second argument node ($arg) must be a number\n";
exit 1;
}
push(@COMMAND_QUEUE, "$sec $cmd $arg");
$NODE{$arg."_SRC_NODE_FAILED"}=1;
$NODE{$arg}=1;
my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
$LOG_SUFFIX.="$sec$cmd$arg.";
} elsif ($cmd eq 'send' || $cmd eq 'recv') {
my($node,$prob);
if ($arg=~/node=(\d+),prob=(\d+)/i) {
$node=$1;
$prob=$2;
push(@COMMAND_QUEUE, "$sec $cmd $node,$prob");
} else {
die "Sorry, send/recv accepts in the format of -send 500:node=5,prob=80";
}
$NODE{$node."_SRC_INSUFFICIENT_DATA"}=1;
$NODE{$node."_SRC_NODE_FAILED"}=1;
my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
$NODE{$node}=1;
$LOG_SUFFIX.="$sec$cmd$arg.";
} elsif ($cmd eq 'reboot') {
# arg is node number
if ($arg!~/^\d+/) {
printw "Second argument node ($arg) must be a number\n";
exit 1;
}
push(@COMMAND_QUEUE, "$sec $cmd $arg");
# setup the node failure
$NODE{$arg."_SRC_NODE_REBOOTED"}=1;
$NODE{$arg}=1;
$LOG_SUFFIX.="$sec$cmd$arg.";
}
} elsif ($arg eq 'verbose') {
$G{verbose}=1;
} else {
warn "Argument '$arg' not understood.";
exit 1;
}
}
$SIM_FILE.=" $epoch";
$LOG_SUFFIX.="epoch$epoch.";
# put in an exit command if the last one isn't already exit
if ($COMMAND_QUEUE[$#COMMAND_QUEUE] !~ /exit/) {
push(@COMMAND_QUEUE, ($METRICS_PERIOD*10)." exit");
$EXIT_TIME=($METRICS_PERIOD*10);
}
if ($EXIT_TIME==0) {die "Exit time not specified!\n";}
if (!$ENV{SIM_GROUP}) {die "You need to set SIM_GROUP (or specify -sim_group between 0 to 99)"; }
printw(("="x75)."\n");
printw "Command review:\n";
foreach my $cmd (@COMMAND_QUEUE) {
printw "\t$cmd\n";
}
foreach my $node (sort (keys %NODE)) {
if ($node =~ s/^(\d+)_//) {
printw "Expecting fault $node for node $1\n";
} else {
printw "Expecting 'something' for node $node\n";
}
}
# Get a text to ID translation
$MESSAGES{"Unrecognized Fault"}="UNRECOGNIZED";
open(FD, "$EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c") ||
die "Can't find $EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c";
while(my $line=<FD>) {
if ($line=~/(SR._\w+)\s*\)\s+return\s+\"([^\"]+)\"/) {
$MESSAGES{$2}=$1;
}
}
# Start running the test here!
my @_CMD=@COMMAND_QUEUE;
my($_from, $_to)=($ENV{SIM_GROUP}, $ENV{SIM_GROUP}+$INSTANCES-1);
foreach my $iter (1..$ITERATIONS) {
@COMMAND_QUEUE=@_CMD;
printw "\n";
printw "Running iteration $iter\n";
# execute all the emrun command/instances asynchronously
foreach my $group ($_from..$_to) {
$ENV{SIM_GROUP}=$group;
my $type=$SIMULATION?'sim':'emu';
my $suffix=($LOG_SUFFIX?$LOG_SUFFIX.'.':'');
foreach my $count ('a'..'z') {
if (!-e ($ENV{LOG_DIR}="$SYMPATHY_DIR/group$ENV{SIM_GROUP}.${LOG_SUFFIX}iter$iter$count.$type")) {
last;
}
}
if (-e $ENV{LOG_DIR}) {
print "WARNING: Directory $ENV{LOG_DIR} already exists! Over-writing...\n";
}
system("mkdir -p $ENV{LOG_DIR}");
unlink("$ENV{LOG_DIR}/sars.log");
printw "Log dir: $ENV{LOG_DIR}\n";
my $origdir=`pwd`; chomp($origdir);
if (!-e "./emrun/emsim") {die "Can't find ./emrun/emsim";}
my $empid=launch("./emrun/emsim $SIM_FILE", "emrun.log");
my $time=$EXIT_TIME+5;
my $pid2=`(sleep $time; kill -9 $empid) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid2);
push(@PID, $empid, $pid2);
my $obj = Instance->new();
$obj->log_dir($ENV{LOG_DIR});
$obj->sim_group($group);
$obj->iter($iter);
push(@INSTANCES, $obj);
logw($obj, sprintf("#h Run-Iter Timepassed TestIter Node-id Component Type Failure Correct"));
logw($obj, "# ($ENV{LOG_DIR}) ".`date`);
}
# loop here
my $starttime=time;
my $repeat=1;
while ($repeat) {
processFileHandles();
my $timepassed=time-$starttime;
if ($G{verbose}) {
if ($timepassed%60==0) {print(sprintf("%d min",int($timepassed/60)));}
$STDOUT_NO_CR=1;
print ".";
}
# process commands after time has passed
while ($#COMMAND_QUEUE!=-1 &&
(split(/\s+/, $COMMAND_QUEUE[0]))[0] <= $timepassed) {
my $cmd=shift(@COMMAND_QUEUE);
foreach my $obj (@INSTANCES) {
$ENV{LOG_DIR}=$obj->log_dir;
$ENV{SIM_GROUP}=$obj->sim_group;
if (processCommand($obj, $cmd, $timepassed) eq 'exit') {
$repeat=0;
}
}
}
# parse the lines
my $quits=$#INSTANCES+1;
foreach my $obj (@INSTANCES) {
$ENV{LOG_DIR}=$obj->log_dir;
$ENV{SIM_GROUP}=$obj->sim_group;
if (!processSummFail($obj, $timepassed)) {
printw "FATAL ERROR DETECTED, resetting...\n";
$repeat=0;
last;
}
if ($QUIT_AFTER_SUCC) {
if ($obj->succ>=$SUCCESS) {
$obj->quit_timer($obj->quit_timer - 1);
system("touch $ENV{LOG_DIR}/SUCC");
}
if ($obj->quit_timer<=0) {
$quits--;
# one time user notification
if ($obj->quit_timer==0) {
printw("SUCC: group ".$obj->sim_group." successfully detected failure for a while.\n");
}
}
}
}
if ($quits<=0) {
printw "SUCC: Every instance has detected failure, next run...\n";
$repeat=0;
}
sleep 1;
}
# get network summary
foreach my $obj (@INSTANCES) {
$ENV{SIM_GROUP}=$obj->sim_group;
foreach my $i (1..$NODES) {
if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
my $node=genNode($i);
my @summary=`cat /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject`;
my($a,$b,$c,$d);
if ($summary[0]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
$a=$1; $b=$2; $c=$3; $d=$4;
} else {
printw "Error, can't get packet statistics from $ENV{SIM_GROUP} $node: '$summary[0]'\n";
}
if ($summary[1]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
printw "Packet ($ENV{SIM_GROUP} $node send/recv) Sympathy:$a/${b}B $1/$2# Other:$c/${d}B $3/$4#\n";
}
}
}
killProcesses();
if ($BZIP2) {
system("bzip2 $SYMPATHY_DIR/group*/log? $SYMPATHY_DIR/group*/log?? $SYMPATHY_DIR/group*/emrun.log");
}
#if ($iter!=$ITERATIONS) {
printw "Waiting for processes to really end...";
sleep 12;
printw "done\n";
#}
}
printw "Sars all done, exiting.\n";
exit 0;
| CENS CVS Mailing List |
Powered by ViewCVS 0.9.2 |