(file) Return to sars.pl CVS log (file) Jump to this file's LXR Page (dir) Up to [CENS] / emstar / devel / sympathy_devel

File: [CENS] / emstar / devel / sympathy_devel / sars.pl (download) / (as text)
Revision: 1.34, Thu Jul 20 23:26:09 2006 UTC (3 years, 4 months ago) by nithya
Branch: MAIN
CVS Tags: pregeonet, PRE_TOSNIC_FIX, PRE_64BIT, HEAD, CYCLOPS_RELEASE_CANDIDATE_2_0, CYCLOPS_PRERELEASE_STABLE, CENTROUTE_EMSTAR_SOCKETS, AMARSS_JR_DEPLOYMENT_6_05_07
Changes since 1.33: +8 -1 lines
*** empty log message ***

#!/usr/bin/perl
#
# sars - Sympathy Automated Regression System

use strict;

# global variables, change this to accomodate your own environments
my(%G);
#default variable settings

# determines what is considered a "success". 3 is
# a correct root-cause and localization. 2 is just
# a correct root-cause, but not necessarily localization.
# 1 is some root-cause at the node.
my $SUCCESS = 3; 

my $epoch = 3; 
my $METRICS_PERIOD=30; 
my $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
my $INITIAL_DELAY = 15 * 60;

my $EMSTAR_HOME="$ENV{HOME}/cvs/emstar_test/emstar";

my $JITTER_SCRIPT="$ENV{HOME}/bin/inject_jitter";
my($SIM_FILE, $NODES, @NODESKIP);
$NODES = 30;
    if (!-e $JITTER_SCRIPT) {
        print "Script to inject jitter ($JITTER_SCRIPT) not found\n";
        exit 1;
    }
#    $SIM_FILE="./sympathy_sim.sim";
    $SIM_FILE="../devel/sympathy_devel/testtabs/sympathy_sim.sim";
my $SYMPATHY_DIR="$ENV{HOME}/.sympathy_3";  # where Sympathy regression files are located
my $SIM_CEILING_FILE=" --ceiling ../devel/sympathy_devel/sympathy_ceiling.sim"; #NR 

my $SINKNODE='002';   # the heading '00' in '002' is VERY IMPORTANT
my $ITERATIONS=1;
my $SIMULATION = 1;
my $QUIT_AFTER_SUCC=1; # if we successfully found a fault/failure, then quit

select(STDOUT); $|=1;
use POSIX;
use Fcntl;
use FileHandle;

# ----------------- define the instance object --------------------
package Instance;
use vars qw($AUTOLOAD);
my %vars = (
            log_dir => undef,
            sim_group => 'GROUP_UNDEF',
            iter => 1,

            summ_fail_handle => '',
            #sigsegv_handle => '',

            s_iter => 1, # current iteration
            s_node => undef,

            faults => 0,
            failures => 0,
            count => 0,  # status
            previter => -1,

            succ => -1, # highest success status
            quit_timer => 30,
            );
sub new {
    my $me = shift;
    my $class = ref($me) || $me;
    my $self = {%vars};
    bless $self, $class;
    return $self;
}
sub keys {
    my $self = shift;
    return (keys %$self);
}
sub AUTOLOAD {
    my $self = shift;
    my $type = ref($self) || die "$self is not an object";
    my $name = $AUTOLOAD; 
    $name =~ s/.*://;
    if (@_) {
        return $self->{$name} = shift;
    } elsif (exists $self->{$name}) {
        return $self->{$name};
    } elsif ($name eq 'DESTROY') {
        return undef;
    } else {
        print "ERROR: entry object->$name() not found\n";
        exit 1;
        #return undef;
    }
}


# -----------------------------------------------------------------
package main;

my @INSTANCES;
my @FILEHANDLES;
my %BUFFER;

my $INSTANCES=1;       # default num of instances run
my %MESSAGES;          # mapping between STF_OK and "OK"
my %NODE;              # success/fail info (SRC_NO_DATA, SRC_NODE_FAILED, etc)
my %HANDLE2FILE;       # associate handle with command
my $LOG_SUFFIX='';     # extra information
my $EXIT_TIME=60*60*2; # kill EVERYTHING in 2 hours for safety
my $STDOUT_NO_CR=0;    # for pretty print purpose
my $BZIP2=0;

# exception handling
$SIG{INT}=\&handler;
$SIG{HUP}=\&handler;
$SIG{STOP}=\&handler;
my @PID;
sub killProcesses {
    if ($#PID!=-1) {
        printw("Cleaning processes ",join(', ',@PID),"...");
        my $pids=join(' ', @PID);
        # Do the following instead of Perl's kill because Perl's kill hangs
        system("(kill $pids > /dev/null 2>&1) &");
        system("(sleep 10; kill -9 $pids > /dev/null 2>&1) &");
        printw("done\n");
    }
    #foreach my $handle (@FILEHANDLES) {close($handle);}
    @PID=@FILEHANDLES=@INSTANCES=%BUFFER=%HANDLE2FILE=();
}
sub handler {
    my $pids=join(' ', @PID);
    killProcesses();
    system("(kill -9 $pids > /dev/null 2>&1) &");
    exit 1;
}

sub hasLine {
    my($handle)=@_;
    if ($BUFFER{$handle}=~/\n/) {return 1;}
}
sub getLine {
    my($handle)=@_;

    # Get rid of new-lines!
    while ($BUFFER{$handle}=~/^\n/)
    {
      $BUFFER{$handle}=~s/(^\n)(.*)/$2/;
    }
    if ($BUFFER{$handle}=~s/^([^\n]+\n)(.*)/$2/) {return $1;}
}

sub processFileHandles {
    #fcntl($handle, F_SETFL(), O_NONBLOCK());
    my $buf;
    foreach my $handle (@FILEHANDLES) {
        my $bytes_read=1024;
        my $bound=100; # safety for switching tasks
        while ($bytes_read && $bound-->0) {
            # IMPORTANT, below is a non-blocking call
            $bytes_read = sysread($handle, $buf, 1024);
            if (defined($bytes_read)) {
                if ($bytes_read == 0) {
                    # Remote socket closed connection
                    warn "Handle closed!\n";
                    close($handle);
                    last;
                } else { 
                    $BUFFER{$handle} .= $buf;
                }
            } else {
                my $stat = $!;
                if ($stat == EAGAIN()) {
                    # Can return to select. Here we choose to 
                    # spin around waiting for something to read.
                } else {
                    last;
                }
                #print "Stat:$stat\n";
            }
        }
    }
}

sub openAsynchCommand {
    my($command)=@_;
    my $handle = new FileHandle;
    $HANDLE2FILE{$handle}=$command;
    push @FILEHANDLES, $handle;
    #open($handle, "$ENV{HOME}/bin/loop 2>&1 |");
    open($handle, "$command 2>&1 |") || die "Can't execute $command";
    fcntl($handle, F_SETFL, O_NONBLOCK | O_RDWR | O_NDELAY)
        or die "Couldn't set flags for HANDLE: $!\n";
    return $handle;
}

sub printw {
    if ($G{verbose} && $STDOUT_NO_CR) {
        $STDOUT_NO_CR=0;
        print "\n";
    }
    print @_;
}

# Se open and then close to make sure we flush all the logs.
# Slightly inefficient but simple.
sub logw {
    my($obj, $line)=@_;
    $line=~s/\s+$//; # strip trailing space

    my $file="$ENV{LOG_DIR}/sars.log";
    open(WR, ">>$file") || die "Can't write to $file";
    print(WR $line."\n");
    close(WR);

    printw "> $line\n";
}

sub processSummFail {
    my($obj, $timepassed)=@_;
    my $handle=$obj->summ_fail_handle;
    my($s_node, $s_iter);

    if (!$handle) {return 1}
    while (hasLine($handle)) {
        my $line=getLine($handle);
        if ($line=~/\*\*\*\*\*\*\*\*/) {
            next;
        } elsif ($line=~/Node\s+(\d+).*Metric Pd:\s+(\d+)/) {
            $s_node=$1; $s_iter=$2;
            if ($s_iter != $obj->s_iter) {
                $obj->s_iter($s_iter);
                $obj->count(0); 
                $obj->failures(0);
                $obj->faults(0);
                if ($G{verbose} && $s_iter!=$obj->previter) {
                    printw("Group:".$obj->sim_group."  time:$timepassed  sink iteration:$s_iter\n");
                    $obj->previter($s_iter);
                }
            }
            $obj->s_node($s_node);
            $obj->count($obj->count+1); # see if we're done with messages
        } else {
	   foreach my $status (split(/,/,$line)) {
                $status=~s/^\s+//;
                $status=~s/\s+$//;
                if ($status=~/^(.*)\s*(Failure).*Root-Cause: (.+)\(/) {
                    my $cat=$1;
                    my $ff=$2;
                    my $msg=$3; 
		    $msg=~s/\s+$//; 
		    print "now CHECK msg now: $msg\n";
		    $msg=$MESSAGES{$msg};
		    print "2 now CHECK msg now: $msg\n";
                    my $node=$obj->s_node;
		    print "CHECK node: $node\n";
		    print "CHECK line: $status\ncomp of failure is: $cat, failure: $msg\n";
		    print "CHECK cat: $cat\n";
		    my $mp = $NODE{$node."_$msg"};
			print "CHECK node_msg: $mp\n";
                    if (!($msg eq 'SRC_OK')) {
                        $obj->failures($obj->failures+1);

                        my $success=0;
                        my $success_code = '2';
	                if ($cat=~/Root/) { $success_code = '3'; }
			#                 if ($cat=~/Node/i) {
                            $success=($NODE{$node."_$msg"}?$success_code:
                                      $NODE{$node}?'1':'0');
		        #}

                        # FALSE report! Wrong node!
                        if ($success==0 && $msg=~/NODE_FAILED/) {$success=-1}
                        
                        # record the highest succ level
                        if ($obj->succ < $success) {$obj->succ($success);}

                        #iter time s_iter node category type correctness
                        logw($obj,
                             sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
                                     $obj->iter, $timepassed, 
                                     $obj->s_iter, $node, 
                                     $cat, $ff, 
                                     $msg,
                                     $success));
                    }
                }
	    }
	}
    }
    return 1;
}

sub genNode {
    my($node)=@_;
    return sprintf("node%.3d", $node);
}

sub launch {
    my($cmd, $logfile)=@_;

    if ($logfile) {
        $cmd.=" > $ENV{LOG_DIR}/$logfile";
    } else {
        my @file=split(/\//, $cmd);
        my $file=$file[$#file];
        $cmd.=" > $ENV{LOG_DIR}/._$file"; # make it hidden
    }

    # EXECUTE below
    my $pid=`$cmd 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid);

    push(@PID, $pid);

    printw("> Launching command: $cmd ($pid)\n");
    return $pid
}

sub log_script {
    my $sinknode=genNode($SINKNODE);
    my $linkdump="$EMSTAR_HOME/obj.i686-linux/bin/linkdump";
    my $echocat="$EMSTAR_HOME/obj.i686-linux/bin/echocat";
    my @pids;
    push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/emlog/all/all-f", "log2"));
    push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/tos/logs/error-f", "log_error2"));

    push(@pids, 
         launch("$linkdump -f -l -N 2 -G $ENV{SIM_GROUP} -r -U mote0 -T", "link2"),
         launch("$echocat -w /dev/node002/sympathy/metrics", "all_metrics"),
         launch("$echocat -w /dev/emrun/last_msg", "last_message"),
         launch("$echocat -w /dev/node002/sympathy/summary", "all_summ_fail"));
 #foreach my $i (1..$NODES) {
 #       if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
 #       my $node=genNode($i);
 #       push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$node/tos/logs/usr3,usr1-f", "log$i"));
 #   }

    my $pids=join(' ', @pids);
    my $time=$EXIT_TIME+5;
    
    my $killpid=`(sleep $time; kill -9 $pids) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($killpid);
    push(@PID, $killpid);
    printw "After $time seconds, kill $pids\n";
}

# Central place to put in actions
sub processCommand {
    my($obj, $cmd, $timepassed)=@_;
    my($time,$cmd,$action)=split(/\s+/, $cmd);
    $ENV{SIM_GROUP}=$obj->sim_group;

    #NR Log injection of failure
    logw($obj,
           sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
           $obj->iter, $timepassed, 
           $obj->s_iter, $action,
	   "Inject", "Command", "$cmd".($action?",$action":""), '0'));
                            
    if ($G{verbose}) {printw "*Time:$time executing:$cmd $action (group $ENV{SIM_GROUP})\n"}
    if ($cmd eq 'exit') {
        return 'exit';

    } elsif ($cmd eq 'reboot') {
        # write into /dev/.../fault_inject
        my $node=genNode($action);  # $action is the node number here
        system("echo reboot > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
        if ($?) {printw("Problem injecting 'reboot' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}
        
    } elsif ($cmd eq 'traffic') {
        $action*=10;   # Convert sec to 100ms units
        $cmd="echo 'id=1:sensors=33:period=$action' > /dev/sim/group$ENV{SIM_GROUP}/node$SINKNODE/dse/query";
	printw "Injecting traffic with command:\n$cmd\n";
	system($cmd);
        if ($?) {printw("Problem injecting 'traffic' \n");}
    } elsif ($cmd eq 'jitter') {
        $cmd="$JITTER_SCRIPT";
	printw "Injecting jitter with command:\n$cmd\n";
	system($cmd);
        if ($?) {printw("Problem injecting 'jitter' \n");}
    } elsif ($cmd eq 'die') {
        # write into /dev/.../fault_inject
        my $node=genNode($action);
	system("echo die > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
        if ($?) {printw("Problem injecting 'die' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}

    } elsif ($cmd eq 'halt') {
        my $node=genNode($action);
	print "GOING TO HALT NODE!\n";
        system("echo halt > /dev/sim/group$ENV{SIM_GROUP}/$node/emrun/command");
        if ($?) {printw("Problem injecting 'halt' into $node\n");}

    } elsif ($cmd eq 'send' || $cmd eq 'recv') {
        my($node,$prob)=(split(/,/, $action));
        $node=genNode($action);
        system("echo $cmd:node=$node:prob=$prob > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");

    } elsif ($cmd eq 'monitor') {
        # start reading summ_fail
        if ($obj->summ_fail_handle) {
            warn "Already parsing summ_fail! Command ignored\n";
            return;
        }
        my $sinknode=genNode($SINKNODE);

        log_script();

        my $handler = openAsynchCommand("$EMSTAR_HOME/obj.i686-linux/bin/echocat -w /dev/$sinknode/sympathy/summary");
        $obj->summ_fail_handle($handler);

        #my $log_dir=$obj->log_dir;
        #my $handler2 = openAsynchCommand("tail -f $log_dir/emrun.log | grep SIG");
        #$obj->sigsegv_handle($handler2);

        # echo non-sense into nodes (try to kick them start)
        printw "Checking if all the nodes are up and running...\n";
        foreach my $i (1..$NODES) {
            if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;} 
            my $node=genNode($i);
            system("echo WAKE > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
            if ($?) {printw "\tWARNING: Node $i is not ready\n";}
        }

    } else {
        warn "Command ($cmd, $action) not understood\n";
    }
}



# ------------------ program initialization --------------------
system("mkdir -p /tmp/$ENV{USER}");
if (!$ENV{HOME}) {die "You need to setup the HOME environment\n"; }
if (!-e $SYMPATHY_DIR) {system("mkdir -p $SYMPATHY_DIR");}
if (!-e $EMSTAR_HOME) {
    if (!$ENV{EMSTAR_HOME}) {die "Can't find $EMSTAR_HOME, please set EMSTAR_HOME\n";}
    if (!-e $ENV{EMSTAR_HOME}) {die "Can't find $ENV{EMSTAR_HOME}, please re-set EMSTAR_HOME\n"; }
    $EMSTAR_HOME = $ENV{EMSTAR_HOME};
}
    
my $line=`grep METRICS_PERIOD $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h` || die "Can't open $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h";

if ($line=~/METRICS_PERIOD_MSEC\s+(\d+)/) {$METRICS_PERIOD=$1/1000;} else {die "Can't find METRICS_PERIOD in Sympathy.h";}
$TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
print "CHECK metrics-period: $METRICS_PERIOD, track-fail: $TRACK_FAIL_PERIOD\n";

# process arguments
if ($#ARGV==-1) {
    printw <<EOL;
Sympathy Automated Regression System (sars) Usage Guide:
> sars.pl [options below]
  -instances <num>         Number of EmStars to run simultaneously. [default=1]
  -iter <num>              Number of times to run/generate data [default=1]
  -ceiling                 Run with ceiling cfg file:\n\t$SIM_CEILING_FILE
  -monitor <sec>           Start reading /dev/*/files after <sec>
  -die <node_num>          After random time, node <node_num> radio dies
  -halt <node_num>         After random time, node <node_num> *HALTS* completely
  -reboot <node_num>       After random time, reboot node <node_num> 
  -jitter <sec1>           After <sec1>, disable jitter on nodes (to force congestion on 
  				Sympathy traffic
  -epoch	           #metrics-pd per failure-epoch
  -traffic <sec2>          After random time, inject dse-traffic w/ period sec2
  -keepgoing               If this is specified, simulation will keep running
                           until time is up. If this is not specified, then
                           simulation will quit 30 seconds after it has
                           successfully detected fault/failure.
  -exit <sec>              After <sec> exit. By default exit after $METRICS_PERIOD*10 seconds
  -verbose                 Print time
  -kill                    Kill previous runs
  -bzip2                   Use bzip2 to compress logs
Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -die 500:7 -exit 2000 -verbose
Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -send 500:node=3,prob=70 -recv \\
                 500:node=3,prob=70 500:7 -exit 2000 -verbose
EOL
exit 0;
}
 
# parse Arguments
my @COMMAND_QUEUE;
my $_prevsec=0;
for (my $i=0; $i<=$#ARGV; $i++) {
    my $arg=$ARGV[$i]; 
    if ($arg!~s/^\-//) {die "Argument $arg not recognized";}
    if ($arg eq 'kill') {
        system("ps auxwww|grep $ENV{USER}|egrep '(emrun|emsim|sympathy_sink|sympathy_app|wmpid)' | awk '{print \$2}'|xargs kill -9");
        printw "All Sympathy processes should be killed\n";
        exit 0;
    } elsif ($arg eq 'bzip2') {
        $BZIP2=1;
    } elsif ($arg eq 'sim_group') {
        my $sim_group=$ARGV[++$i];
        if ($sim_group!~/^\d+$/) {die "SIM_GROUP must be between 0-99";}
        $ENV{SIM_GROUP}=$sim_group;
    } elsif ($arg eq 'iter') {
        $ITERATIONS=$ARGV[++$i];
    } elsif ($arg eq 'instances') {
        $INSTANCES=$ARGV[++$i];
    } elsif ($arg eq 'keepgoing') {
        $QUIT_AFTER_SUCC=0;
    } elsif ($arg eq 'epoch') {
      $epoch =$ARGV[++$i];
      #Re-calculate the track-fail-period!
      $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
    } elsif ($arg eq 'ceiling') {
	$SIM_FILE = $SIM_CEILING_FILE;
        $SIMULATION = 0;
	printw "New sim-file: $SIM_FILE\n";
    } elsif ($arg =~/jitter|traffic|monitor|die|halt|recv|send|reboot|exit/) {
        my $cmd=$arg;
	my($sec,$arg)=split(':',$ARGV[++$i]);

	#Randomize time failure is injected!
        if ($cmd =~/die|halt|recv|send|reboot/) {
          $arg = $sec;

	  # rand within stats period cuz we check every stats pd for failure
	  $sec = int(rand($METRICS_PERIOD));

	  #This is done to get rid of fact that sympathy waits for 
	  # epoch*stats-pd in the beginning. And that the network
	  # takes some time to form.
	  print "sec was $sec\n";
	  #$sec += (3 * $TRACK_FAIL_PERIOD); 
	  $sec += $INITIAL_DELAY;
	  print "sec is $sec\n";
        }

        $_prevsec = $sec;
        
        if ($cmd eq 'monitor') {
            push(@COMMAND_QUEUE, "$sec $cmd");
        } elsif ($cmd eq 'exit') {
            push(@COMMAND_QUEUE, "$sec $cmd");
            $EXIT_TIME=$sec;
        } elsif ($cmd eq 'traffic') {
            # arg is the period
            if ($arg!~/^\d+/) {
                printw "$cmd: Second argument period ($arg) must be a number\n";
                exit 1;
            }
            push(@COMMAND_QUEUE, "$sec $cmd $arg");
            $LOG_SUFFIX.="$sec$cmd$arg.";
        } elsif ($cmd eq 'jitter') {
            push(@COMMAND_QUEUE, "$sec $cmd");
            $LOG_SUFFIX.="$sec$cmd.";
        } elsif ($cmd eq 'die' || $cmd eq 'halt') {
            # arg is node number
            if ($arg!~/^\d+/) {
                printw "Second argument node ($arg) must be a number\n";
                exit 1;
            }
            push(@COMMAND_QUEUE, "$sec $cmd $arg");
            $NODE{$arg."_SRC_NODE_FAILED"}=1;
            $NODE{$arg}=1;
	    my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
	    print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
            $LOG_SUFFIX.="$sec$cmd$arg.";
        } elsif ($cmd eq 'send' || $cmd eq 'recv') {
            my($node,$prob);
            if ($arg=~/node=(\d+),prob=(\d+)/i) {
                $node=$1;
                $prob=$2;
                push(@COMMAND_QUEUE, "$sec $cmd $node,$prob");

            } else {
                die "Sorry, send/recv accepts in the format of -send 500:node=5,prob=80";
            }
            $NODE{$node."_SRC_INSUFFICIENT_DATA"}=1;
            $NODE{$node."_SRC_NODE_FAILED"}=1;
	    my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
	    print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
            $NODE{$node}=1;
            $LOG_SUFFIX.="$sec$cmd$arg.";

        } elsif ($cmd eq 'reboot') {
            # arg is node number
            if ($arg!~/^\d+/) {
                printw "Second argument node ($arg) must be a number\n";
                exit 1;
            }
            push(@COMMAND_QUEUE, "$sec $cmd $arg");
            # setup the node failure
            $NODE{$arg."_SRC_NODE_REBOOTED"}=1;
            $NODE{$arg}=1;
            $LOG_SUFFIX.="$sec$cmd$arg.";
        }
    } elsif ($arg eq 'verbose') {
        $G{verbose}=1;
    } else {
        warn "Argument '$arg' not understood.";
        exit 1;
    }
}

$SIM_FILE.=" $epoch";
$LOG_SUFFIX.="epoch$epoch.";

# put in an exit command if the last one isn't already exit
if ($COMMAND_QUEUE[$#COMMAND_QUEUE] !~ /exit/) {
    push(@COMMAND_QUEUE, ($METRICS_PERIOD*10)." exit");
    $EXIT_TIME=($METRICS_PERIOD*10);
}
if ($EXIT_TIME==0) {die "Exit time not specified!\n";}
if (!$ENV{SIM_GROUP}) {die "You need to set SIM_GROUP (or specify -sim_group between 0 to 99)"; }


printw(("="x75)."\n");
printw "Command review:\n";
foreach my $cmd (@COMMAND_QUEUE) {
    printw "\t$cmd\n";
}
foreach my $node (sort (keys %NODE)) {
    if ($node =~ s/^(\d+)_//) {
        printw "Expecting fault $node for node $1\n";
    } else {
        printw "Expecting 'something' for node $node\n";
    }
}


# Get a text to ID translation
$MESSAGES{"Unrecognized Fault"}="UNRECOGNIZED";
open(FD, "$EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c") || 
    die "Can't find $EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c";
while(my $line=<FD>) {
    if ($line=~/(SR._\w+)\s*\)\s+return\s+\"([^\"]+)\"/) {
        $MESSAGES{$2}=$1;
    }
}

# Start running the test here!
my @_CMD=@COMMAND_QUEUE;
my($_from, $_to)=($ENV{SIM_GROUP}, $ENV{SIM_GROUP}+$INSTANCES-1);
foreach my $iter (1..$ITERATIONS) {
    @COMMAND_QUEUE=@_CMD;
    printw "\n";
    printw "Running iteration $iter\n";

    # execute all the emrun command/instances asynchronously
    foreach my $group ($_from..$_to) {
        $ENV{SIM_GROUP}=$group;
        my $type=$SIMULATION?'sim':'emu';
        my $suffix=($LOG_SUFFIX?$LOG_SUFFIX.'.':'');
        foreach my $count ('a'..'z') {
            if (!-e ($ENV{LOG_DIR}="$SYMPATHY_DIR/group$ENV{SIM_GROUP}.${LOG_SUFFIX}iter$iter$count.$type")) {
                last;
            }
        }
        if (-e $ENV{LOG_DIR}) {
            print "WARNING: Directory $ENV{LOG_DIR} already exists! Over-writing...\n";
        }
        system("mkdir -p $ENV{LOG_DIR}");
        unlink("$ENV{LOG_DIR}/sars.log");
        
        printw "Log dir: $ENV{LOG_DIR}\n";
        my $origdir=`pwd`; chomp($origdir);
        if (!-e "./emrun/emsim") {die "Can't find ./emrun/emsim";}
	my $empid=launch("./emrun/emsim $SIM_FILE", "emrun.log");
        my $time=$EXIT_TIME+5;
        my $pid2=`(sleep $time; kill -9 $empid) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid2);
        push(@PID, $empid, $pid2);

        my $obj = Instance->new();
        $obj->log_dir($ENV{LOG_DIR});
        $obj->sim_group($group);
        $obj->iter($iter);
        push(@INSTANCES, $obj);

        logw($obj, sprintf("#h Run-Iter Timepassed TestIter Node-id Component Type Failure Correct"));
        logw($obj, "# ($ENV{LOG_DIR}) ".`date`);
    }

    # loop here
    my $starttime=time;
    my $repeat=1;
    while ($repeat) {
        processFileHandles();
        my $timepassed=time-$starttime;

        if ($G{verbose}) {
            if ($timepassed%60==0) {print(sprintf("%d min",int($timepassed/60)));}
            $STDOUT_NO_CR=1;
            print ".";
        }

        # process commands after time has passed
        while ($#COMMAND_QUEUE!=-1 &&
               (split(/\s+/, $COMMAND_QUEUE[0]))[0] <= $timepassed) {
            my $cmd=shift(@COMMAND_QUEUE);
            foreach my $obj (@INSTANCES) {
                $ENV{LOG_DIR}=$obj->log_dir;
                $ENV{SIM_GROUP}=$obj->sim_group;
                if (processCommand($obj, $cmd, $timepassed) eq 'exit') {
                    $repeat=0;
                }
            }
        }

        # parse the lines
        my $quits=$#INSTANCES+1;
        foreach my $obj (@INSTANCES) {
            $ENV{LOG_DIR}=$obj->log_dir;
            $ENV{SIM_GROUP}=$obj->sim_group;
            if (!processSummFail($obj, $timepassed)) {
                printw "FATAL ERROR DETECTED, resetting...\n";
                $repeat=0; 
                last;
            }
            if ($QUIT_AFTER_SUCC) {
                if ($obj->succ>=$SUCCESS) {
                    $obj->quit_timer($obj->quit_timer - 1);
                    system("touch $ENV{LOG_DIR}/SUCC");
                }
                if ($obj->quit_timer<=0) {
                    $quits--;
                    # one time user notification
                    if ($obj->quit_timer==0) {
                        printw("SUCC: group ".$obj->sim_group." successfully detected failure for a while.\n");
                    }
                }
            }
        }
        if ($quits<=0) {
            printw "SUCC: Every instance has detected failure, next run...\n";
            $repeat=0;
        }
        sleep 1;
    }
    # get network summary
    foreach my $obj (@INSTANCES) {
        $ENV{SIM_GROUP}=$obj->sim_group;
        foreach my $i (1..$NODES) {
            if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
            my $node=genNode($i);
            my @summary=`cat /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject`;
            my($a,$b,$c,$d);
            if ($summary[0]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
                $a=$1; $b=$2; $c=$3; $d=$4;
            } else {
                printw "Error, can't get packet statistics from $ENV{SIM_GROUP} $node: '$summary[0]'\n";
            }
            if ($summary[1]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
                printw "Packet ($ENV{SIM_GROUP} $node send/recv) Sympathy:$a/${b}B $1/$2#  Other:$c/${d}B $3/$4#\n";
            }
        }
    }


    killProcesses();

    if ($BZIP2) {
        system("bzip2 $SYMPATHY_DIR/group*/log? $SYMPATHY_DIR/group*/log?? $SYMPATHY_DIR/group*/emrun.log");
        
    } 
    #if ($iter!=$ITERATIONS) {
    printw "Waiting for processes to really end...";
    sleep 12;
    printw "done\n";
    #}
}
printw "Sars all done, exiting.\n";
exit 0;

CENS CVS Mailing List
Powered by
ViewCVS 0.9.2