~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
cvs/emstar/devel/sympathy_devel/sars.pl


  1 #!/usr/bin/perl
  2 #
  3 # sars - Sympathy Automated Regression System
  4 
  5 use strict;
  6 
  7 # global variables, change this to accomodate your own environments
  8 my(%G);
  9 #default variable settings
 10 
 11 # determines what is considered a "success". 3 is
 12 # a correct root-cause and localization. 2 is just
 13 # a correct root-cause, but not necessarily localization.
 14 # 1 is some root-cause at the node.
 15 my $SUCCESS = 3; 
 16 
 17 my $epoch = 3; 
 18 my $METRICS_PERIOD=30; 
 19 my $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
 20 my $INITIAL_DELAY = 15 * 60;
 21 
 22 my $EMSTAR_HOME="$ENV{HOME}/cvs/emstar_test/emstar";
 23 
 24 my $JITTER_SCRIPT="$ENV{HOME}/bin/inject_jitter";
 25 my($SIM_FILE, $NODES, @NODESKIP);
 26 $NODES = 30;
 27     if (!-e $JITTER_SCRIPT) {
 28         print "Script to inject jitter ($JITTER_SCRIPT) not found\n";
 29         exit 1;
 30     }
 31 #    $SIM_FILE="./sympathy_sim.sim";
 32     $SIM_FILE="../devel/sympathy_devel/testtabs/sympathy_sim.sim";
 33 my $SYMPATHY_DIR="$ENV{HOME}/.sympathy_3";  # where Sympathy regression files are located
 34 my $SIM_CEILING_FILE=" --ceiling ../devel/sympathy_devel/sympathy_ceiling.sim"; #NR 
 35 
 36 my $SINKNODE='002';   # the heading '00' in '002' is VERY IMPORTANT
 37 my $ITERATIONS=1;
 38 my $SIMULATION = 1;
 39 my $QUIT_AFTER_SUCC=1; # if we successfully found a fault/failure, then quit
 40 
 41 select(STDOUT); $|=1;
 42 use POSIX;
 43 use Fcntl;
 44 use FileHandle;
 45 
 46 # ----------------- define the instance object --------------------
 47 package Instance;
 48 use vars qw($AUTOLOAD);
 49 my %vars = (
 50             log_dir => undef,
 51             sim_group => 'GROUP_UNDEF',
 52             iter => 1,
 53 
 54             summ_fail_handle => '',
 55             #sigsegv_handle => '',
 56 
 57             s_iter => 1, # current iteration
 58             s_node => undef,
 59 
 60             faults => 0,
 61             failures => 0,
 62             count => 0,  # status
 63             previter => -1,
 64 
 65             succ => -1, # highest success status
 66             quit_timer => 30,
 67             );
 68 sub new {
 69     my $me = shift;
 70     my $class = ref($me) || $me;
 71     my $self = {%vars};
 72     bless $self, $class;
 73     return $self;
 74 }
 75 sub keys {
 76     my $self = shift;
 77     return (keys %$self);
 78 }
 79 sub AUTOLOAD {
 80     my $self = shift;
 81     my $type = ref($self) || die "$self is not an object";
 82     my $name = $AUTOLOAD; 
 83     $name =~ s/.*://;
 84     if (@_) {
 85         return $self->{$name} = shift;
 86     } elsif (exists $self->{$name}) {
 87         return $self->{$name};
 88     } elsif ($name eq 'DESTROY') {
 89         return undef;
 90     } else {
 91         print "ERROR: entry object->$name() not found\n";
 92         exit 1;
 93         #return undef;
 94     }
 95 }
 96 
 97 
 98 # -----------------------------------------------------------------
 99 package main;
100 
101 my @INSTANCES;
102 my @FILEHANDLES;
103 my %BUFFER;
104 
105 my $INSTANCES=1;       # default num of instances run
106 my %MESSAGES;          # mapping between STF_OK and "OK"
107 my %NODE;              # success/fail info (SRC_NO_DATA, SRC_NODE_FAILED, etc)
108 my %HANDLE2FILE;       # associate handle with command
109 my $LOG_SUFFIX='';     # extra information
110 my $EXIT_TIME=60*60*2; # kill EVERYTHING in 2 hours for safety
111 my $STDOUT_NO_CR=0;    # for pretty print purpose
112 my $BZIP2=0;
113 
114 # exception handling
115 $SIG{INT}=\&handler;
116 $SIG{HUP}=\&handler;
117 $SIG{STOP}=\&handler;
118 my @PID;
119 sub killProcesses {
120     if ($#PID!=-1) {
121         printw("Cleaning processes ",join(', ',@PID),"...");
122         my $pids=join(' ', @PID);
123         # Do the following instead of Perl's kill because Perl's kill hangs
124         system("(kill $pids > /dev/null 2>&1) &");
125         system("(sleep 10; kill -9 $pids > /dev/null 2>&1) &");
126         printw("done\n");
127     }
128     #foreach my $handle (@FILEHANDLES) {close($handle);}
129     @PID=@FILEHANDLES=@INSTANCES=%BUFFER=%HANDLE2FILE=();
130 }
131 sub handler {
132     my $pids=join(' ', @PID);
133     killProcesses();
134     system("(kill -9 $pids > /dev/null 2>&1) &");
135     exit 1;
136 }
137 
138 sub hasLine {
139     my($handle)=@_;
140     if ($BUFFER{$handle}=~/\n/) {return 1;}
141 }
142 sub getLine {
143     my($handle)=@_;
144 
145     # Get rid of new-lines!
146     while ($BUFFER{$handle}=~/^\n/)
147     {
148       $BUFFER{$handle}=~s/(^\n)(.*)/$2/;
149     }
150     if ($BUFFER{$handle}=~s/^([^\n]+\n)(.*)/$2/) {return $1;}
151 }
152 
153 sub processFileHandles {
154     #fcntl($handle, F_SETFL(), O_NONBLOCK());
155     my $buf;
156     foreach my $handle (@FILEHANDLES) {
157         my $bytes_read=1024;
158         my $bound=100; # safety for switching tasks
159         while ($bytes_read && $bound-->0) {
160             # IMPORTANT, below is a non-blocking call
161             $bytes_read = sysread($handle, $buf, 1024);
162             if (defined($bytes_read)) {
163                 if ($bytes_read == 0) {
164                     # Remote socket closed connection
165                     warn "Handle closed!\n";
166                     close($handle);
167                     last;
168                 } else { 
169                     $BUFFER{$handle} .= $buf;
170                 }
171             } else {
172                 my $stat = $!;
173                 if ($stat == EAGAIN()) {
174                     # Can return to select. Here we choose to 
175                     # spin around waiting for something to read.
176                 } else {
177                     last;
178                 }
179                 #print "Stat:$stat\n";
180             }
181         }
182     }
183 }
184 
185 sub openAsynchCommand {
186     my($command)=@_;
187     my $handle = new FileHandle;
188     $HANDLE2FILE{$handle}=$command;
189     push @FILEHANDLES, $handle;
190     #open($handle, "$ENV{HOME}/bin/loop 2>&1 |");
191     open($handle, "$command 2>&1 |") || die "Can't execute $command";
192     fcntl($handle, F_SETFL, O_NONBLOCK | O_RDWR | O_NDELAY)
193         or die "Couldn't set flags for HANDLE: $!\n";
194     return $handle;
195 }
196 
197 sub printw {
198     if ($G{verbose} && $STDOUT_NO_CR) {
199         $STDOUT_NO_CR=0;
200         print "\n";
201     }
202     print @_;
203 }
204 
205 # Se open and then close to make sure we flush all the logs.
206 # Slightly inefficient but simple.
207 sub logw {
208     my($obj, $line)=@_;
209     $line=~s/\s+$//; # strip trailing space
210 
211     my $file="$ENV{LOG_DIR}/sars.log";
212     open(WR, ">>$file") || die "Can't write to $file";
213     print(WR $line."\n");
214     close(WR);
215 
216     printw "> $line\n";
217 }
218 
219 sub processSummFail {
220     my($obj, $timepassed)=@_;
221     my $handle=$obj->summ_fail_handle;
222     my($s_node, $s_iter);
223 
224     if (!$handle) {return 1}
225     while (hasLine($handle)) {
226         my $line=getLine($handle);
227         if ($line=~/\*\*\*\*\*\*\*\*/) {
228             next;
229         } elsif ($line=~/Node\s+(\d+).*Metric Pd:\s+(\d+)/) {
230             $s_node=$1; $s_iter=$2;
231             if ($s_iter != $obj->s_iter) {
232                 $obj->s_iter($s_iter);
233                 $obj->count(0); 
234                 $obj->failures(0);
235                 $obj->faults(0);
236                 if ($G{verbose} && $s_iter!=$obj->previter) {
237                     printw("Group:".$obj->sim_group."  time:$timepassed  sink iteration:$s_iter\n");
238                     $obj->previter($s_iter);
239                 }
240             }
241             $obj->s_node($s_node);
242             $obj->count($obj->count+1); # see if we're done with messages
243         } else {
244            foreach my $status (split(/,/,$line)) {
245                 $status=~s/^\s+//;
246                 $status=~s/\s+$//;
247                 if ($status=~/^(.*)\s*(Failure).*Root-Cause: (.+)\(/) {
248                     my $cat=$1;
249                     my $ff=$2;
250                     my $msg=$3; 
251                     $msg=~s/\s+$//; 
252                     print "now CHECK msg now: $msg\n";
253                     $msg=$MESSAGES{$msg};
254                     print "2 now CHECK msg now: $msg\n";
255                     my $node=$obj->s_node;
256                     print "CHECK node: $node\n";
257                     print "CHECK line: $status\ncomp of failure is: $cat, failure: $msg\n";
258                     print "CHECK cat: $cat\n";
259                     my $mp = $NODE{$node."_$msg"};
260                         print "CHECK node_msg: $mp\n";
261                     if (!($msg eq 'SRC_OK')) {
262                         $obj->failures($obj->failures+1);
263 
264                         my $success=0;
265                         my $success_code = '2';
266                         if ($cat=~/Root/) { $success_code = '3'; }
267                         #                 if ($cat=~/Node/i) {
268                             $success=($NODE{$node."_$msg"}?$success_code:
269                                       $NODE{$node}?'1':'0');
270                         #}
271 
272                         # FALSE report! Wrong node!
273                         if ($success==0 && $msg=~/NODE_FAILED/) {$success=-1}
274                         
275                         # record the highest succ level
276                         if ($obj->succ < $success) {$obj->succ($success);}
277 
278                         #iter time s_iter node category type correctness
279                         logw($obj,
280                              sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
281                                      $obj->iter, $timepassed, 
282                                      $obj->s_iter, $node, 
283                                      $cat, $ff, 
284                                      $msg,
285                                      $success));
286                     }
287                 }
288             }
289         }
290     }
291     return 1;
292 }
293 
294 sub genNode {
295     my($node)=@_;
296     return sprintf("node%.3d", $node);
297 }
298 
299 sub launch {
300     my($cmd, $logfile)=@_;
301 
302     if ($logfile) {
303         $cmd.=" > $ENV{LOG_DIR}/$logfile";
304     } else {
305         my @file=split(/\//, $cmd);
306         my $file=$file[$#file];
307         $cmd.=" > $ENV{LOG_DIR}/._$file"; # make it hidden
308     }
309 
310     # EXECUTE below
311     my $pid=`$cmd 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid);
312 
313     push(@PID, $pid);
314 
315     printw("> Launching command: $cmd ($pid)\n");
316     return $pid
317 }
318 
319 sub log_script {
320     my $sinknode=genNode($SINKNODE);
321     my $linkdump="$EMSTAR_HOME/obj.i686-linux/bin/linkdump";
322     my $echocat="$EMSTAR_HOME/obj.i686-linux/bin/echocat";
323     my @pids;
324     push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/emlog/all/all-f", "log2"));
325     push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/tos/logs/error-f", "log_error2"));
326 
327     push(@pids, 
328          launch("$linkdump -f -l -N 2 -G $ENV{SIM_GROUP} -r -U mote0 -T", "link2"),
329          launch("$echocat -w /dev/node002/sympathy/metrics", "all_metrics"),
330          launch("$echocat -w /dev/emrun/last_msg", "last_message"),
331          launch("$echocat -w /dev/node002/sympathy/summary", "all_summ_fail"));
332  #foreach my $i (1..$NODES) {
333  #       if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
334  #       my $node=genNode($i);
335  #       push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$node/tos/logs/usr3,usr1-f", "log$i"));
336  #   }
337 
338     my $pids=join(' ', @pids);
339     my $time=$EXIT_TIME+5;
340     
341     my $killpid=`(sleep $time; kill -9 $pids) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($killpid);
342     push(@PID, $killpid);
343     printw "After $time seconds, kill $pids\n";
344 }
345 
346 # Central place to put in actions
347 sub processCommand {
348     my($obj, $cmd, $timepassed)=@_;
349     my($time,$cmd,$action)=split(/\s+/, $cmd);
350     $ENV{SIM_GROUP}=$obj->sim_group;
351 
352     #NR Log injection of failure
353     logw($obj,
354            sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
355            $obj->iter, $timepassed, 
356            $obj->s_iter, $action,
357            "Inject", "Command", "$cmd".($action?",$action":""), '0'));
358                             
359     if ($G{verbose}) {printw "*Time:$time executing:$cmd $action (group $ENV{SIM_GROUP})\n"}
360     if ($cmd eq 'exit') {
361         return 'exit';
362 
363     } elsif ($cmd eq 'reboot') {
364         # write into /dev/.../fault_inject
365         my $node=genNode($action);  # $action is the node number here
366         system("echo reboot > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
367         if ($?) {printw("Problem injecting 'reboot' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}
368         
369     } elsif ($cmd eq 'traffic') {
370         $action*=10;   # Convert sec to 100ms units
371         $cmd="echo 'id=1:sensors=33:period=$action' > /dev/sim/group$ENV{SIM_GROUP}/node$SINKNODE/dse/query";
372         printw "Injecting traffic with command:\n$cmd\n";
373         system($cmd);
374         if ($?) {printw("Problem injecting 'traffic' \n");}
375     } elsif ($cmd eq 'jitter') {
376         $cmd="$JITTER_SCRIPT";
377         printw "Injecting jitter with command:\n$cmd\n";
378         system($cmd);
379         if ($?) {printw("Problem injecting 'jitter' \n");}
380     } elsif ($cmd eq 'die') {
381         # write into /dev/.../fault_inject
382         my $node=genNode($action);
383         system("echo die > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
384         if ($?) {printw("Problem injecting 'die' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}
385 
386     } elsif ($cmd eq 'halt') {
387         my $node=genNode($action);
388         print "GOING TO HALT NODE!\n";
389         system("echo halt > /dev/sim/group$ENV{SIM_GROUP}/$node/emrun/command");
390         if ($?) {printw("Problem injecting 'halt' into $node\n");}
391 
392     } elsif ($cmd eq 'send' || $cmd eq 'recv') {
393         my($node,$prob)=(split(/,/, $action));
394         $node=genNode($action);
395         system("echo $cmd:node=$node:prob=$prob > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
396 
397     } elsif ($cmd eq 'monitor') {
398         # start reading summ_fail
399         if ($obj->summ_fail_handle) {
400             warn "Already parsing summ_fail! Command ignored\n";
401             return;
402         }
403         my $sinknode=genNode($SINKNODE);
404 
405         log_script();
406 
407         my $handler = openAsynchCommand("$EMSTAR_HOME/obj.i686-linux/bin/echocat -w /dev/$sinknode/sympathy/summary");
408         $obj->summ_fail_handle($handler);
409 
410         #my $log_dir=$obj->log_dir;
411         #my $handler2 = openAsynchCommand("tail -f $log_dir/emrun.log | grep SIG");
412         #$obj->sigsegv_handle($handler2);
413 
414         # echo non-sense into nodes (try to kick them start)
415         printw "Checking if all the nodes are up and running...\n";
416         foreach my $i (1..$NODES) {
417             if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;} 
418             my $node=genNode($i);
419             system("echo WAKE > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
420             if ($?) {printw "\tWARNING: Node $i is not ready\n";}
421         }
422 
423     } else {
424         warn "Command ($cmd, $action) not understood\n";
425     }
426 }
427 
428 
429 
430 # ------------------ program initialization --------------------
431 system("mkdir -p /tmp/$ENV{USER}");
432 if (!$ENV{HOME}) {die "You need to setup the HOME environment\n"; }
433 if (!-e $SYMPATHY_DIR) {system("mkdir -p $SYMPATHY_DIR");}
434 if (!-e $EMSTAR_HOME) {
435     if (!$ENV{EMSTAR_HOME}) {die "Can't find $EMSTAR_HOME, please set EMSTAR_HOME\n";}
436     if (!-e $ENV{EMSTAR_HOME}) {die "Can't find $ENV{EMSTAR_HOME}, please re-set EMSTAR_HOME\n"; }
437     $EMSTAR_HOME = $ENV{EMSTAR_HOME};
438 }
439     
440 my $line=`grep METRICS_PERIOD $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h` || die "Can't open $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h";
441 
442 if ($line=~/METRICS_PERIOD_MSEC\s+(\d+)/) {$METRICS_PERIOD=$1/1000;} else {die "Can't find METRICS_PERIOD in Sympathy.h";}
443 $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
444 print "CHECK metrics-period: $METRICS_PERIOD, track-fail: $TRACK_FAIL_PERIOD\n";
445 
446 # process arguments
447 if ($#ARGV==-1) {
448     printw <<EOL;
449 Sympathy Automated Regression System (sars) Usage Guide:
450 > sars.pl [options below]
451   -instances <num>         Number of EmStars to run simultaneously. [default=1]
452   -iter <num>              Number of times to run/generate data [default=1]
453   -ceiling                 Run with ceiling cfg file:\n\t$SIM_CEILING_FILE
454   -monitor <sec>           Start reading /dev/*/files after <sec>
455   -die <node_num>          After random time, node <node_num> radio dies
456   -halt <node_num>         After random time, node <node_num> *HALTS* completely
457   -reboot <node_num>       After random time, reboot node <node_num> 
458   -jitter <sec1>           After <sec1>, disable jitter on nodes (to force congestion on 
459                                 Sympathy traffic
460   -epoch                   #metrics-pd per failure-epoch
461   -traffic <sec2>          After random time, inject dse-traffic w/ period sec2
462   -keepgoing               If this is specified, simulation will keep running
463                            until time is up. If this is not specified, then
464                            simulation will quit 30 seconds after it has
465                            successfully detected fault/failure.
466   -exit <sec>              After <sec> exit. By default exit after $METRICS_PERIOD*10 seconds
467   -verbose                 Print time
468   -kill                    Kill previous runs
469   -bzip2                   Use bzip2 to compress logs
470 Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -die 500:7 -exit 2000 -verbose
471 Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -send 500:node=3,prob=70 -recv \\
472                  500:node=3,prob=70 500:7 -exit 2000 -verbose
473 EOL
474 exit 0;
475 }
476  
477 # parse Arguments
478 my @COMMAND_QUEUE;
479 my $_prevsec=0;
480 for (my $i=0; $i<=$#ARGV; $i++) {
481     my $arg=$ARGV[$i]; 
482     if ($arg!~s/^\-//) {die "Argument $arg not recognized";}
483     if ($arg eq 'kill') {
484         system("ps auxwww|grep $ENV{USER}|egrep '(emrun|emsim|sympathy_sink|sympathy_app|wmpid)' | awk '{print \$2}'|xargs kill -9");
485         printw "All Sympathy processes should be killed\n";
486         exit 0;
487     } elsif ($arg eq 'bzip2') {
488         $BZIP2=1;
489     } elsif ($arg eq 'sim_group') {
490         my $sim_group=$ARGV[++$i];
491         if ($sim_group!~/^\d+$/) {die "SIM_GROUP must be between 0-99";}
492         $ENV{SIM_GROUP}=$sim_group;
493     } elsif ($arg eq 'iter') {
494         $ITERATIONS=$ARGV[++$i];
495     } elsif ($arg eq 'instances') {
496         $INSTANCES=$ARGV[++$i];
497     } elsif ($arg eq 'keepgoing') {
498         $QUIT_AFTER_SUCC=0;
499     } elsif ($arg eq 'epoch') {
500       $epoch =$ARGV[++$i];
501       #Re-calculate the track-fail-period!
502       $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
503     } elsif ($arg eq 'ceiling') {
504         $SIM_FILE = $SIM_CEILING_FILE;
505         $SIMULATION = 0;
506         printw "New sim-file: $SIM_FILE\n";
507     } elsif ($arg =~/jitter|traffic|monitor|die|halt|recv|send|reboot|exit/) {
508         my $cmd=$arg;
509         my($sec,$arg)=split(':',$ARGV[++$i]);
510 
511         #Randomize time failure is injected!
512         if ($cmd =~/die|halt|recv|send|reboot/) {
513           $arg = $sec;
514 
515           # rand within stats period cuz we check every stats pd for failure
516           $sec = int(rand($METRICS_PERIOD));
517 
518           #This is done to get rid of fact that sympathy waits for 
519           # epoch*stats-pd in the beginning. And that the network
520           # takes some time to form.
521           print "sec was $sec\n";
522           #$sec += (3 * $TRACK_FAIL_PERIOD); 
523           $sec += $INITIAL_DELAY;
524           print "sec is $sec\n";
525         }
526 
527         $_prevsec = $sec;
528         
529         if ($cmd eq 'monitor') {
530             push(@COMMAND_QUEUE, "$sec $cmd");
531         } elsif ($cmd eq 'exit') {
532             push(@COMMAND_QUEUE, "$sec $cmd");
533             $EXIT_TIME=$sec;
534         } elsif ($cmd eq 'traffic') {
535             # arg is the period
536             if ($arg!~/^\d+/) {
537                 printw "$cmd: Second argument period ($arg) must be a number\n";
538                 exit 1;
539             }
540             push(@COMMAND_QUEUE, "$sec $cmd $arg");
541             $LOG_SUFFIX.="$sec$cmd$arg.";
542         } elsif ($cmd eq 'jitter') {
543             push(@COMMAND_QUEUE, "$sec $cmd");
544             $LOG_SUFFIX.="$sec$cmd.";
545         } elsif ($cmd eq 'die' || $cmd eq 'halt') {
546             # arg is node number
547             if ($arg!~/^\d+/) {
548                 printw "Second argument node ($arg) must be a number\n";
549                 exit 1;
550             }
551             push(@COMMAND_QUEUE, "$sec $cmd $arg");
552             $NODE{$arg."_SRC_NODE_FAILED"}=1;
553             $NODE{$arg}=1;
554             my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
555             print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
556             $LOG_SUFFIX.="$sec$cmd$arg.";
557         } elsif ($cmd eq 'send' || $cmd eq 'recv') {
558             my($node,$prob);
559             if ($arg=~/node=(\d+),prob=(\d+)/i) {
560                 $node=$1;
561                 $prob=$2;
562                 push(@COMMAND_QUEUE, "$sec $cmd $node,$prob");
563 
564             } else {
565                 die "Sorry, send/recv accepts in the format of -send 500:node=5,prob=80";
566             }
567             $NODE{$node."_SRC_INSUFFICIENT_DATA"}=1;
568             $NODE{$node."_SRC_NODE_FAILED"}=1;
569             my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
570             print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
571             $NODE{$node}=1;
572             $LOG_SUFFIX.="$sec$cmd$arg.";
573 
574         } elsif ($cmd eq 'reboot') {
575             # arg is node number
576             if ($arg!~/^\d+/) {
577                 printw "Second argument node ($arg) must be a number\n";
578                 exit 1;
579             }
580             push(@COMMAND_QUEUE, "$sec $cmd $arg");
581             # setup the node failure
582             $NODE{$arg."_SRC_NODE_REBOOTED"}=1;
583             $NODE{$arg}=1;
584             $LOG_SUFFIX.="$sec$cmd$arg.";
585         }
586     } elsif ($arg eq 'verbose') {
587         $G{verbose}=1;
588     } else {
589         warn "Argument '$arg' not understood.";
590         exit 1;
591     }
592 }
593 
594 $SIM_FILE.=" $epoch";
595 $LOG_SUFFIX.="epoch$epoch.";
596 
597 # put in an exit command if the last one isn't already exit
598 if ($COMMAND_QUEUE[$#COMMAND_QUEUE] !~ /exit/) {
599     push(@COMMAND_QUEUE, ($METRICS_PERIOD*10)." exit");
600     $EXIT_TIME=($METRICS_PERIOD*10);
601 }
602 if ($EXIT_TIME==0) {die "Exit time not specified!\n";}
603 if (!$ENV{SIM_GROUP}) {die "You need to set SIM_GROUP (or specify -sim_group between 0 to 99)"; }
604 
605 
606 printw(("="x75)."\n");
607 printw "Command review:\n";
608 foreach my $cmd (@COMMAND_QUEUE) {
609     printw "\t$cmd\n";
610 }
611 foreach my $node (sort (keys %NODE)) {
612     if ($node =~ s/^(\d+)_//) {
613         printw "Expecting fault $node for node $1\n";
614     } else {
615         printw "Expecting 'something' for node $node\n";
616     }
617 }
618 
619 
620 # Get a text to ID translation
621 $MESSAGES{"Unrecognized Fault"}="UNRECOGNIZED";
622 open(FD, "$EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c") || 
623     die "Can't find $EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c";
624 while(my $line=<FD>) {
625     if ($line=~/(SR._\w+)\s*\)\s+return\s+\"([^\"]+)\"/) {
626         $MESSAGES{$2}=$1;
627     }
628 }
629 
630 # Start running the test here!
631 my @_CMD=@COMMAND_QUEUE;
632 my($_from, $_to)=($ENV{SIM_GROUP}, $ENV{SIM_GROUP}+$INSTANCES-1);
633 foreach my $iter (1..$ITERATIONS) {
634     @COMMAND_QUEUE=@_CMD;
635     printw "\n";
636     printw "Running iteration $iter\n";
637 
638     # execute all the emrun command/instances asynchronously
639     foreach my $group ($_from..$_to) {
640         $ENV{SIM_GROUP}=$group;
641         my $type=$SIMULATION?'sim':'emu';
642         my $suffix=($LOG_SUFFIX?$LOG_SUFFIX.'.':'');
643         foreach my $count ('a'..'z') {
644             if (!-e ($ENV{LOG_DIR}="$SYMPATHY_DIR/group$ENV{SIM_GROUP}.${LOG_SUFFIX}iter$iter$count.$type")) {
645                 last;
646             }
647         }
648         if (-e $ENV{LOG_DIR}) {
649             print "WARNING: Directory $ENV{LOG_DIR} already exists! Over-writing...\n";
650         }
651         system("mkdir -p $ENV{LOG_DIR}");
652         unlink("$ENV{LOG_DIR}/sars.log");
653         
654         printw "Log dir: $ENV{LOG_DIR}\n";
655         my $origdir=`pwd`; chomp($origdir);
656         if (!-e "./emrun/emsim") {die "Can't find ./emrun/emsim";}
657         my $empid=launch("./emrun/emsim $SIM_FILE", "emrun.log");
658         my $time=$EXIT_TIME+5;
659         my $pid2=`(sleep $time; kill -9 $empid) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid2);
660         push(@PID, $empid, $pid2);
661 
662         my $obj = Instance->new();
663         $obj->log_dir($ENV{LOG_DIR});
664         $obj->sim_group($group);
665         $obj->iter($iter);
666         push(@INSTANCES, $obj);
667 
668         logw($obj, sprintf("#h Run-Iter Timepassed TestIter Node-id Component Type Failure Correct"));
669         logw($obj, "# ($ENV{LOG_DIR}) ".`date`);
670     }
671 
672     # loop here
673     my $starttime=time;
674     my $repeat=1;
675     while ($repeat) {
676         processFileHandles();
677         my $timepassed=time-$starttime;
678 
679         if ($G{verbose}) {
680             if ($timepassed%60==0) {print(sprintf("%d min",int($timepassed/60)));}
681             $STDOUT_NO_CR=1;
682             print ".";
683         }
684 
685         # process commands after time has passed
686         while ($#COMMAND_QUEUE!=-1 &&
687                (split(/\s+/, $COMMAND_QUEUE[0]))[0] <= $timepassed) {
688             my $cmd=shift(@COMMAND_QUEUE);
689             foreach my $obj (@INSTANCES) {
690                 $ENV{LOG_DIR}=$obj->log_dir;
691                 $ENV{SIM_GROUP}=$obj->sim_group;
692                 if (processCommand($obj, $cmd, $timepassed) eq 'exit') {
693                     $repeat=0;
694                 }
695             }
696         }
697 
698         # parse the lines
699         my $quits=$#INSTANCES+1;
700         foreach my $obj (@INSTANCES) {
701             $ENV{LOG_DIR}=$obj->log_dir;
702             $ENV{SIM_GROUP}=$obj->sim_group;
703             if (!processSummFail($obj, $timepassed)) {
704                 printw "FATAL ERROR DETECTED, resetting...\n";
705                 $repeat=0; 
706                 last;
707             }
708             if ($QUIT_AFTER_SUCC) {
709                 if ($obj->succ>=$SUCCESS) {
710                     $obj->quit_timer($obj->quit_timer - 1);
711                     system("touch $ENV{LOG_DIR}/SUCC");
712                 }
713                 if ($obj->quit_timer<=0) {
714                     $quits--;
715                     # one time user notification
716                     if ($obj->quit_timer==0) {
717                         printw("SUCC: group ".$obj->sim_group." successfully detected failure for a while.\n");
718                     }
719                 }
720             }
721         }
722         if ($quits<=0) {
723             printw "SUCC: Every instance has detected failure, next run...\n";
724             $repeat=0;
725         }
726         sleep 1;
727     }
728     # get network summary
729     foreach my $obj (@INSTANCES) {
730         $ENV{SIM_GROUP}=$obj->sim_group;
731         foreach my $i (1..$NODES) {
732             if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
733             my $node=genNode($i);
734             my @summary=`cat /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject`;
735             my($a,$b,$c,$d);
736             if ($summary[0]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
737                 $a=$1; $b=$2; $c=$3; $d=$4;
738             } else {
739                 printw "Error, can't get packet statistics from $ENV{SIM_GROUP} $node: '$summary[0]'\n";
740             }
741             if ($summary[1]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
742                 printw "Packet ($ENV{SIM_GROUP} $node send/recv) Sympathy:$a/${b}B $1/$2#  Other:$c/${d}B $3/$4#\n";
743             }
744         }
745     }
746 
747 
748     killProcesses();
749 
750     if ($BZIP2) {
751         system("bzip2 $SYMPATHY_DIR/group*/log? $SYMPATHY_DIR/group*/log?? $SYMPATHY_DIR/group*/emrun.log");
752         
753     } 
754     #if ($iter!=$ITERATIONS) {
755     printw "Waiting for processes to really end...";
756     sleep 12;
757     printw "done\n";
758     #}
759 }
760 printw "Sars all done, exiting.\n";
761 exit 0;

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.