1 #!/usr/bin/perl
2 #
3 # sars - Sympathy Automated Regression System
4
5 use strict;
6
7 # global variables, change this to accomodate your own environments
8 my(%G);
9 #default variable settings
10
11 # determines what is considered a "success". 3 is
12 # a correct root-cause and localization. 2 is just
13 # a correct root-cause, but not necessarily localization.
14 # 1 is some root-cause at the node.
15 my $SUCCESS = 3;
16
17 my $epoch = 3;
18 my $METRICS_PERIOD=30;
19 my $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
20 my $INITIAL_DELAY = 15 * 60;
21
22 my $EMSTAR_HOME="$ENV{HOME}/cvs/emstar_test/emstar";
23
24 my $JITTER_SCRIPT="$ENV{HOME}/bin/inject_jitter";
25 my($SIM_FILE, $NODES, @NODESKIP);
26 $NODES = 30;
27 if (!-e $JITTER_SCRIPT) {
28 print "Script to inject jitter ($JITTER_SCRIPT) not found\n";
29 exit 1;
30 }
31 # $SIM_FILE="./sympathy_sim.sim";
32 $SIM_FILE="../devel/sympathy_devel/testtabs/sympathy_sim.sim";
33 my $SYMPATHY_DIR="$ENV{HOME}/.sympathy_3"; # where Sympathy regression files are located
34 my $SIM_CEILING_FILE=" --ceiling ../devel/sympathy_devel/sympathy_ceiling.sim"; #NR
35
36 my $SINKNODE='002'; # the heading '00' in '002' is VERY IMPORTANT
37 my $ITERATIONS=1;
38 my $SIMULATION = 1;
39 my $QUIT_AFTER_SUCC=1; # if we successfully found a fault/failure, then quit
40
41 select(STDOUT); $|=1;
42 use POSIX;
43 use Fcntl;
44 use FileHandle;
45
46 # ----------------- define the instance object --------------------
47 package Instance;
48 use vars qw($AUTOLOAD);
49 my %vars = (
50 log_dir => undef,
51 sim_group => 'GROUP_UNDEF',
52 iter => 1,
53
54 summ_fail_handle => '',
55 #sigsegv_handle => '',
56
57 s_iter => 1, # current iteration
58 s_node => undef,
59
60 faults => 0,
61 failures => 0,
62 count => 0, # status
63 previter => -1,
64
65 succ => -1, # highest success status
66 quit_timer => 30,
67 );
68 sub new {
69 my $me = shift;
70 my $class = ref($me) || $me;
71 my $self = {%vars};
72 bless $self, $class;
73 return $self;
74 }
75 sub keys {
76 my $self = shift;
77 return (keys %$self);
78 }
79 sub AUTOLOAD {
80 my $self = shift;
81 my $type = ref($self) || die "$self is not an object";
82 my $name = $AUTOLOAD;
83 $name =~ s/.*://;
84 if (@_) {
85 return $self->{$name} = shift;
86 } elsif (exists $self->{$name}) {
87 return $self->{$name};
88 } elsif ($name eq 'DESTROY') {
89 return undef;
90 } else {
91 print "ERROR: entry object->$name() not found\n";
92 exit 1;
93 #return undef;
94 }
95 }
96
97
98 # -----------------------------------------------------------------
99 package main;
100
101 my @INSTANCES;
102 my @FILEHANDLES;
103 my %BUFFER;
104
105 my $INSTANCES=1; # default num of instances run
106 my %MESSAGES; # mapping between STF_OK and "OK"
107 my %NODE; # success/fail info (SRC_NO_DATA, SRC_NODE_FAILED, etc)
108 my %HANDLE2FILE; # associate handle with command
109 my $LOG_SUFFIX=''; # extra information
110 my $EXIT_TIME=60*60*2; # kill EVERYTHING in 2 hours for safety
111 my $STDOUT_NO_CR=0; # for pretty print purpose
112 my $BZIP2=0;
113
114 # exception handling
115 $SIG{INT}=\&handler;
116 $SIG{HUP}=\&handler;
117 $SIG{STOP}=\&handler;
118 my @PID;
119 sub killProcesses {
120 if ($#PID!=-1) {
121 printw("Cleaning processes ",join(', ',@PID),"...");
122 my $pids=join(' ', @PID);
123 # Do the following instead of Perl's kill because Perl's kill hangs
124 system("(kill $pids > /dev/null 2>&1) &");
125 system("(sleep 10; kill -9 $pids > /dev/null 2>&1) &");
126 printw("done\n");
127 }
128 #foreach my $handle (@FILEHANDLES) {close($handle);}
129 @PID=@FILEHANDLES=@INSTANCES=%BUFFER=%HANDLE2FILE=();
130 }
131 sub handler {
132 my $pids=join(' ', @PID);
133 killProcesses();
134 system("(kill -9 $pids > /dev/null 2>&1) &");
135 exit 1;
136 }
137
138 sub hasLine {
139 my($handle)=@_;
140 if ($BUFFER{$handle}=~/\n/) {return 1;}
141 }
142 sub getLine {
143 my($handle)=@_;
144
145 # Get rid of new-lines!
146 while ($BUFFER{$handle}=~/^\n/)
147 {
148 $BUFFER{$handle}=~s/(^\n)(.*)/$2/;
149 }
150 if ($BUFFER{$handle}=~s/^([^\n]+\n)(.*)/$2/) {return $1;}
151 }
152
153 sub processFileHandles {
154 #fcntl($handle, F_SETFL(), O_NONBLOCK());
155 my $buf;
156 foreach my $handle (@FILEHANDLES) {
157 my $bytes_read=1024;
158 my $bound=100; # safety for switching tasks
159 while ($bytes_read && $bound-->0) {
160 # IMPORTANT, below is a non-blocking call
161 $bytes_read = sysread($handle, $buf, 1024);
162 if (defined($bytes_read)) {
163 if ($bytes_read == 0) {
164 # Remote socket closed connection
165 warn "Handle closed!\n";
166 close($handle);
167 last;
168 } else {
169 $BUFFER{$handle} .= $buf;
170 }
171 } else {
172 my $stat = $!;
173 if ($stat == EAGAIN()) {
174 # Can return to select. Here we choose to
175 # spin around waiting for something to read.
176 } else {
177 last;
178 }
179 #print "Stat:$stat\n";
180 }
181 }
182 }
183 }
184
185 sub openAsynchCommand {
186 my($command)=@_;
187 my $handle = new FileHandle;
188 $HANDLE2FILE{$handle}=$command;
189 push @FILEHANDLES, $handle;
190 #open($handle, "$ENV{HOME}/bin/loop 2>&1 |");
191 open($handle, "$command 2>&1 |") || die "Can't execute $command";
192 fcntl($handle, F_SETFL, O_NONBLOCK | O_RDWR | O_NDELAY)
193 or die "Couldn't set flags for HANDLE: $!\n";
194 return $handle;
195 }
196
197 sub printw {
198 if ($G{verbose} && $STDOUT_NO_CR) {
199 $STDOUT_NO_CR=0;
200 print "\n";
201 }
202 print @_;
203 }
204
205 # Se open and then close to make sure we flush all the logs.
206 # Slightly inefficient but simple.
207 sub logw {
208 my($obj, $line)=@_;
209 $line=~s/\s+$//; # strip trailing space
210
211 my $file="$ENV{LOG_DIR}/sars.log";
212 open(WR, ">>$file") || die "Can't write to $file";
213 print(WR $line."\n");
214 close(WR);
215
216 printw "> $line\n";
217 }
218
219 sub processSummFail {
220 my($obj, $timepassed)=@_;
221 my $handle=$obj->summ_fail_handle;
222 my($s_node, $s_iter);
223
224 if (!$handle) {return 1}
225 while (hasLine($handle)) {
226 my $line=getLine($handle);
227 if ($line=~/\*\*\*\*\*\*\*\*/) {
228 next;
229 } elsif ($line=~/Node\s+(\d+).*Metric Pd:\s+(\d+)/) {
230 $s_node=$1; $s_iter=$2;
231 if ($s_iter != $obj->s_iter) {
232 $obj->s_iter($s_iter);
233 $obj->count(0);
234 $obj->failures(0);
235 $obj->faults(0);
236 if ($G{verbose} && $s_iter!=$obj->previter) {
237 printw("Group:".$obj->sim_group." time:$timepassed sink iteration:$s_iter\n");
238 $obj->previter($s_iter);
239 }
240 }
241 $obj->s_node($s_node);
242 $obj->count($obj->count+1); # see if we're done with messages
243 } else {
244 foreach my $status (split(/,/,$line)) {
245 $status=~s/^\s+//;
246 $status=~s/\s+$//;
247 if ($status=~/^(.*)\s*(Failure).*Root-Cause: (.+)\(/) {
248 my $cat=$1;
249 my $ff=$2;
250 my $msg=$3;
251 $msg=~s/\s+$//;
252 print "now CHECK msg now: $msg\n";
253 $msg=$MESSAGES{$msg};
254 print "2 now CHECK msg now: $msg\n";
255 my $node=$obj->s_node;
256 print "CHECK node: $node\n";
257 print "CHECK line: $status\ncomp of failure is: $cat, failure: $msg\n";
258 print "CHECK cat: $cat\n";
259 my $mp = $NODE{$node."_$msg"};
260 print "CHECK node_msg: $mp\n";
261 if (!($msg eq 'SRC_OK')) {
262 $obj->failures($obj->failures+1);
263
264 my $success=0;
265 my $success_code = '2';
266 if ($cat=~/Root/) { $success_code = '3'; }
267 # if ($cat=~/Node/i) {
268 $success=($NODE{$node."_$msg"}?$success_code:
269 $NODE{$node}?'1':'0');
270 #}
271
272 # FALSE report! Wrong node!
273 if ($success==0 && $msg=~/NODE_FAILED/) {$success=-1}
274
275 # record the highest succ level
276 if ($obj->succ < $success) {$obj->succ($success);}
277
278 #iter time s_iter node category type correctness
279 logw($obj,
280 sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
281 $obj->iter, $timepassed,
282 $obj->s_iter, $node,
283 $cat, $ff,
284 $msg,
285 $success));
286 }
287 }
288 }
289 }
290 }
291 return 1;
292 }
293
294 sub genNode {
295 my($node)=@_;
296 return sprintf("node%.3d", $node);
297 }
298
299 sub launch {
300 my($cmd, $logfile)=@_;
301
302 if ($logfile) {
303 $cmd.=" > $ENV{LOG_DIR}/$logfile";
304 } else {
305 my @file=split(/\//, $cmd);
306 my $file=$file[$#file];
307 $cmd.=" > $ENV{LOG_DIR}/._$file"; # make it hidden
308 }
309
310 # EXECUTE below
311 my $pid=`$cmd 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid);
312
313 push(@PID, $pid);
314
315 printw("> Launching command: $cmd ($pid)\n");
316 return $pid
317 }
318
319 sub log_script {
320 my $sinknode=genNode($SINKNODE);
321 my $linkdump="$EMSTAR_HOME/obj.i686-linux/bin/linkdump";
322 my $echocat="$EMSTAR_HOME/obj.i686-linux/bin/echocat";
323 my @pids;
324 push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/emlog/all/all-f", "log2"));
325 push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$sinknode/tos/logs/error-f", "log_error2"));
326
327 push(@pids,
328 launch("$linkdump -f -l -N 2 -G $ENV{SIM_GROUP} -r -U mote0 -T", "link2"),
329 launch("$echocat -w /dev/node002/sympathy/metrics", "all_metrics"),
330 launch("$echocat -w /dev/emrun/last_msg", "last_message"),
331 launch("$echocat -w /dev/node002/sympathy/summary", "all_summ_fail"));
332 #foreach my $i (1..$NODES) {
333 # if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
334 # my $node=genNode($i);
335 # push(@pids, launch("cat /dev/sim/group$ENV{SIM_GROUP}/$node/tos/logs/usr3,usr1-f", "log$i"));
336 # }
337
338 my $pids=join(' ', @pids);
339 my $time=$EXIT_TIME+5;
340
341 my $killpid=`(sleep $time; kill -9 $pids) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($killpid);
342 push(@PID, $killpid);
343 printw "After $time seconds, kill $pids\n";
344 }
345
346 # Central place to put in actions
347 sub processCommand {
348 my($obj, $cmd, $timepassed)=@_;
349 my($time,$cmd,$action)=split(/\s+/, $cmd);
350 $ENV{SIM_GROUP}=$obj->sim_group;
351
352 #NR Log injection of failure
353 logw($obj,
354 sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
355 $obj->iter, $timepassed,
356 $obj->s_iter, $action,
357 "Inject", "Command", "$cmd".($action?",$action":""), '0'));
358
359 if ($G{verbose}) {printw "*Time:$time executing:$cmd $action (group $ENV{SIM_GROUP})\n"}
360 if ($cmd eq 'exit') {
361 return 'exit';
362
363 } elsif ($cmd eq 'reboot') {
364 # write into /dev/.../fault_inject
365 my $node=genNode($action); # $action is the node number here
366 system("echo reboot > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
367 if ($?) {printw("Problem injecting 'reboot' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}
368
369 } elsif ($cmd eq 'traffic') {
370 $action*=10; # Convert sec to 100ms units
371 $cmd="echo 'id=1:sensors=33:period=$action' > /dev/sim/group$ENV{SIM_GROUP}/node$SINKNODE/dse/query";
372 printw "Injecting traffic with command:\n$cmd\n";
373 system($cmd);
374 if ($?) {printw("Problem injecting 'traffic' \n");}
375 } elsif ($cmd eq 'jitter') {
376 $cmd="$JITTER_SCRIPT";
377 printw "Injecting jitter with command:\n$cmd\n";
378 system($cmd);
379 if ($?) {printw("Problem injecting 'jitter' \n");}
380 } elsif ($cmd eq 'die') {
381 # write into /dev/.../fault_inject
382 my $node=genNode($action);
383 system("echo die > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
384 if ($?) {printw("Problem injecting 'die' into /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject\n");}
385
386 } elsif ($cmd eq 'halt') {
387 my $node=genNode($action);
388 print "GOING TO HALT NODE!\n";
389 system("echo halt > /dev/sim/group$ENV{SIM_GROUP}/$node/emrun/command");
390 if ($?) {printw("Problem injecting 'halt' into $node\n");}
391
392 } elsif ($cmd eq 'send' || $cmd eq 'recv') {
393 my($node,$prob)=(split(/,/, $action));
394 $node=genNode($action);
395 system("echo $cmd:node=$node:prob=$prob > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
396
397 } elsif ($cmd eq 'monitor') {
398 # start reading summ_fail
399 if ($obj->summ_fail_handle) {
400 warn "Already parsing summ_fail! Command ignored\n";
401 return;
402 }
403 my $sinknode=genNode($SINKNODE);
404
405 log_script();
406
407 my $handler = openAsynchCommand("$EMSTAR_HOME/obj.i686-linux/bin/echocat -w /dev/$sinknode/sympathy/summary");
408 $obj->summ_fail_handle($handler);
409
410 #my $log_dir=$obj->log_dir;
411 #my $handler2 = openAsynchCommand("tail -f $log_dir/emrun.log | grep SIG");
412 #$obj->sigsegv_handle($handler2);
413
414 # echo non-sense into nodes (try to kick them start)
415 printw "Checking if all the nodes are up and running...\n";
416 foreach my $i (1..$NODES) {
417 if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
418 my $node=genNode($i);
419 system("echo WAKE > /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject");
420 if ($?) {printw "\tWARNING: Node $i is not ready\n";}
421 }
422
423 } else {
424 warn "Command ($cmd, $action) not understood\n";
425 }
426 }
427
428
429
430 # ------------------ program initialization --------------------
431 system("mkdir -p /tmp/$ENV{USER}");
432 if (!$ENV{HOME}) {die "You need to setup the HOME environment\n"; }
433 if (!-e $SYMPATHY_DIR) {system("mkdir -p $SYMPATHY_DIR");}
434 if (!-e $EMSTAR_HOME) {
435 if (!$ENV{EMSTAR_HOME}) {die "Can't find $EMSTAR_HOME, please set EMSTAR_HOME\n";}
436 if (!-e $ENV{EMSTAR_HOME}) {die "Can't find $ENV{EMSTAR_HOME}, please re-set EMSTAR_HOME\n"; }
437 $EMSTAR_HOME = $ENV{EMSTAR_HOME};
438 }
439
440 my $line=`grep METRICS_PERIOD $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h` || die "Can't open $EMSTAR_HOME/tos-contrib/sympathy/tos/lib/Sympathy.h";
441
442 if ($line=~/METRICS_PERIOD_MSEC\s+(\d+)/) {$METRICS_PERIOD=$1/1000;} else {die "Can't find METRICS_PERIOD in Sympathy.h";}
443 $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
444 print "CHECK metrics-period: $METRICS_PERIOD, track-fail: $TRACK_FAIL_PERIOD\n";
445
446 # process arguments
447 if ($#ARGV==-1) {
448 printw <<EOL;
449 Sympathy Automated Regression System (sars) Usage Guide:
450 > sars.pl [options below]
451 -instances <num> Number of EmStars to run simultaneously. [default=1]
452 -iter <num> Number of times to run/generate data [default=1]
453 -ceiling Run with ceiling cfg file:\n\t$SIM_CEILING_FILE
454 -monitor <sec> Start reading /dev/*/files after <sec>
455 -die <node_num> After random time, node <node_num> radio dies
456 -halt <node_num> After random time, node <node_num> *HALTS* completely
457 -reboot <node_num> After random time, reboot node <node_num>
458 -jitter <sec1> After <sec1>, disable jitter on nodes (to force congestion on
459 Sympathy traffic
460 -epoch #metrics-pd per failure-epoch
461 -traffic <sec2> After random time, inject dse-traffic w/ period sec2
462 -keepgoing If this is specified, simulation will keep running
463 until time is up. If this is not specified, then
464 simulation will quit 30 seconds after it has
465 successfully detected fault/failure.
466 -exit <sec> After <sec> exit. By default exit after $METRICS_PERIOD*10 seconds
467 -verbose Print time
468 -kill Kill previous runs
469 -bzip2 Use bzip2 to compress logs
470 Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -die 500:7 -exit 2000 -verbose
471 Example: sars.pl -sim_group 95 -jitter 150 -monitor 151 -send 500:node=3,prob=70 -recv \\
472 500:node=3,prob=70 500:7 -exit 2000 -verbose
473 EOL
474 exit 0;
475 }
476
477 # parse Arguments
478 my @COMMAND_QUEUE;
479 my $_prevsec=0;
480 for (my $i=0; $i<=$#ARGV; $i++) {
481 my $arg=$ARGV[$i];
482 if ($arg!~s/^\-//) {die "Argument $arg not recognized";}
483 if ($arg eq 'kill') {
484 system("ps auxwww|grep $ENV{USER}|egrep '(emrun|emsim|sympathy_sink|sympathy_app|wmpid)' | awk '{print \$2}'|xargs kill -9");
485 printw "All Sympathy processes should be killed\n";
486 exit 0;
487 } elsif ($arg eq 'bzip2') {
488 $BZIP2=1;
489 } elsif ($arg eq 'sim_group') {
490 my $sim_group=$ARGV[++$i];
491 if ($sim_group!~/^\d+$/) {die "SIM_GROUP must be between 0-99";}
492 $ENV{SIM_GROUP}=$sim_group;
493 } elsif ($arg eq 'iter') {
494 $ITERATIONS=$ARGV[++$i];
495 } elsif ($arg eq 'instances') {
496 $INSTANCES=$ARGV[++$i];
497 } elsif ($arg eq 'keepgoing') {
498 $QUIT_AFTER_SUCC=0;
499 } elsif ($arg eq 'epoch') {
500 $epoch =$ARGV[++$i];
501 #Re-calculate the track-fail-period!
502 $TRACK_FAIL_PERIOD = $METRICS_PERIOD * $epoch;
503 } elsif ($arg eq 'ceiling') {
504 $SIM_FILE = $SIM_CEILING_FILE;
505 $SIMULATION = 0;
506 printw "New sim-file: $SIM_FILE\n";
507 } elsif ($arg =~/jitter|traffic|monitor|die|halt|recv|send|reboot|exit/) {
508 my $cmd=$arg;
509 my($sec,$arg)=split(':',$ARGV[++$i]);
510
511 #Randomize time failure is injected!
512 if ($cmd =~/die|halt|recv|send|reboot/) {
513 $arg = $sec;
514
515 # rand within stats period cuz we check every stats pd for failure
516 $sec = int(rand($METRICS_PERIOD));
517
518 #This is done to get rid of fact that sympathy waits for
519 # epoch*stats-pd in the beginning. And that the network
520 # takes some time to form.
521 print "sec was $sec\n";
522 #$sec += (3 * $TRACK_FAIL_PERIOD);
523 $sec += $INITIAL_DELAY;
524 print "sec is $sec\n";
525 }
526
527 $_prevsec = $sec;
528
529 if ($cmd eq 'monitor') {
530 push(@COMMAND_QUEUE, "$sec $cmd");
531 } elsif ($cmd eq 'exit') {
532 push(@COMMAND_QUEUE, "$sec $cmd");
533 $EXIT_TIME=$sec;
534 } elsif ($cmd eq 'traffic') {
535 # arg is the period
536 if ($arg!~/^\d+/) {
537 printw "$cmd: Second argument period ($arg) must be a number\n";
538 exit 1;
539 }
540 push(@COMMAND_QUEUE, "$sec $cmd $arg");
541 $LOG_SUFFIX.="$sec$cmd$arg.";
542 } elsif ($cmd eq 'jitter') {
543 push(@COMMAND_QUEUE, "$sec $cmd");
544 $LOG_SUFFIX.="$sec$cmd.";
545 } elsif ($cmd eq 'die' || $cmd eq 'halt') {
546 # arg is node number
547 if ($arg!~/^\d+/) {
548 printw "Second argument node ($arg) must be a number\n";
549 exit 1;
550 }
551 push(@COMMAND_QUEUE, "$sec $cmd $arg");
552 $NODE{$arg."_SRC_NODE_FAILED"}=1;
553 $NODE{$arg}=1;
554 my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
555 print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
556 $LOG_SUFFIX.="$sec$cmd$arg.";
557 } elsif ($cmd eq 'send' || $cmd eq 'recv') {
558 my($node,$prob);
559 if ($arg=~/node=(\d+),prob=(\d+)/i) {
560 $node=$1;
561 $prob=$2;
562 push(@COMMAND_QUEUE, "$sec $cmd $node,$prob");
563
564 } else {
565 die "Sorry, send/recv accepts in the format of -send 500:node=5,prob=80";
566 }
567 $NODE{$node."_SRC_INSUFFICIENT_DATA"}=1;
568 $NODE{$node."_SRC_NODE_FAILED"}=1;
569 my $tmp = $NODE{$arg."_SRC_NODE_FAILED"};
570 print "for arg: $arg, msg: _SRC_NODE_FAILED: $tmp\n";
571 $NODE{$node}=1;
572 $LOG_SUFFIX.="$sec$cmd$arg.";
573
574 } elsif ($cmd eq 'reboot') {
575 # arg is node number
576 if ($arg!~/^\d+/) {
577 printw "Second argument node ($arg) must be a number\n";
578 exit 1;
579 }
580 push(@COMMAND_QUEUE, "$sec $cmd $arg");
581 # setup the node failure
582 $NODE{$arg."_SRC_NODE_REBOOTED"}=1;
583 $NODE{$arg}=1;
584 $LOG_SUFFIX.="$sec$cmd$arg.";
585 }
586 } elsif ($arg eq 'verbose') {
587 $G{verbose}=1;
588 } else {
589 warn "Argument '$arg' not understood.";
590 exit 1;
591 }
592 }
593
594 $SIM_FILE.=" $epoch";
595 $LOG_SUFFIX.="epoch$epoch.";
596
597 # put in an exit command if the last one isn't already exit
598 if ($COMMAND_QUEUE[$#COMMAND_QUEUE] !~ /exit/) {
599 push(@COMMAND_QUEUE, ($METRICS_PERIOD*10)." exit");
600 $EXIT_TIME=($METRICS_PERIOD*10);
601 }
602 if ($EXIT_TIME==0) {die "Exit time not specified!\n";}
603 if (!$ENV{SIM_GROUP}) {die "You need to set SIM_GROUP (or specify -sim_group between 0 to 99)"; }
604
605
606 printw(("="x75)."\n");
607 printw "Command review:\n";
608 foreach my $cmd (@COMMAND_QUEUE) {
609 printw "\t$cmd\n";
610 }
611 foreach my $node (sort (keys %NODE)) {
612 if ($node =~ s/^(\d+)_//) {
613 printw "Expecting fault $node for node $1\n";
614 } else {
615 printw "Expecting 'something' for node $node\n";
616 }
617 }
618
619
620 # Get a text to ID translation
621 $MESSAGES{"Unrecognized Fault"}="UNRECOGNIZED";
622 open(FD, "$EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c") ||
623 die "Can't find $EMSTAR_HOME/devel/sympathy/libsympathy/sympathy_decode.c";
624 while(my $line=<FD>) {
625 if ($line=~/(SR._\w+)\s*\)\s+return\s+\"([^\"]+)\"/) {
626 $MESSAGES{$2}=$1;
627 }
628 }
629
630 # Start running the test here!
631 my @_CMD=@COMMAND_QUEUE;
632 my($_from, $_to)=($ENV{SIM_GROUP}, $ENV{SIM_GROUP}+$INSTANCES-1);
633 foreach my $iter (1..$ITERATIONS) {
634 @COMMAND_QUEUE=@_CMD;
635 printw "\n";
636 printw "Running iteration $iter\n";
637
638 # execute all the emrun command/instances asynchronously
639 foreach my $group ($_from..$_to) {
640 $ENV{SIM_GROUP}=$group;
641 my $type=$SIMULATION?'sim':'emu';
642 my $suffix=($LOG_SUFFIX?$LOG_SUFFIX.'.':'');
643 foreach my $count ('a'..'z') {
644 if (!-e ($ENV{LOG_DIR}="$SYMPATHY_DIR/group$ENV{SIM_GROUP}.${LOG_SUFFIX}iter$iter$count.$type")) {
645 last;
646 }
647 }
648 if (-e $ENV{LOG_DIR}) {
649 print "WARNING: Directory $ENV{LOG_DIR} already exists! Over-writing...\n";
650 }
651 system("mkdir -p $ENV{LOG_DIR}");
652 unlink("$ENV{LOG_DIR}/sars.log");
653
654 printw "Log dir: $ENV{LOG_DIR}\n";
655 my $origdir=`pwd`; chomp($origdir);
656 if (!-e "./emrun/emsim") {die "Can't find ./emrun/emsim";}
657 my $empid=launch("./emrun/emsim $SIM_FILE", "emrun.log");
658 my $time=$EXIT_TIME+5;
659 my $pid2=`(sleep $time; kill -9 $empid) > /dev/null 2>&1 & wmpid=\$!; echo \$wmpid`; chomp($pid2);
660 push(@PID, $empid, $pid2);
661
662 my $obj = Instance->new();
663 $obj->log_dir($ENV{LOG_DIR});
664 $obj->sim_group($group);
665 $obj->iter($iter);
666 push(@INSTANCES, $obj);
667
668 logw($obj, sprintf("#h Run-Iter Timepassed TestIter Node-id Component Type Failure Correct"));
669 logw($obj, "# ($ENV{LOG_DIR}) ".`date`);
670 }
671
672 # loop here
673 my $starttime=time;
674 my $repeat=1;
675 while ($repeat) {
676 processFileHandles();
677 my $timepassed=time-$starttime;
678
679 if ($G{verbose}) {
680 if ($timepassed%60==0) {print(sprintf("%d min",int($timepassed/60)));}
681 $STDOUT_NO_CR=1;
682 print ".";
683 }
684
685 # process commands after time has passed
686 while ($#COMMAND_QUEUE!=-1 &&
687 (split(/\s+/, $COMMAND_QUEUE[0]))[0] <= $timepassed) {
688 my $cmd=shift(@COMMAND_QUEUE);
689 foreach my $obj (@INSTANCES) {
690 $ENV{LOG_DIR}=$obj->log_dir;
691 $ENV{SIM_GROUP}=$obj->sim_group;
692 if (processCommand($obj, $cmd, $timepassed) eq 'exit') {
693 $repeat=0;
694 }
695 }
696 }
697
698 # parse the lines
699 my $quits=$#INSTANCES+1;
700 foreach my $obj (@INSTANCES) {
701 $ENV{LOG_DIR}=$obj->log_dir;
702 $ENV{SIM_GROUP}=$obj->sim_group;
703 if (!processSummFail($obj, $timepassed)) {
704 printw "FATAL ERROR DETECTED, resetting...\n";
705 $repeat=0;
706 last;
707 }
708 if ($QUIT_AFTER_SUCC) {
709 if ($obj->succ>=$SUCCESS) {
710 $obj->quit_timer($obj->quit_timer - 1);
711 system("touch $ENV{LOG_DIR}/SUCC");
712 }
713 if ($obj->quit_timer<=0) {
714 $quits--;
715 # one time user notification
716 if ($obj->quit_timer==0) {
717 printw("SUCC: group ".$obj->sim_group." successfully detected failure for a while.\n");
718 }
719 }
720 }
721 }
722 if ($quits<=0) {
723 printw "SUCC: Every instance has detected failure, next run...\n";
724 $repeat=0;
725 }
726 sleep 1;
727 }
728 # get network summary
729 foreach my $obj (@INSTANCES) {
730 $ENV{SIM_GROUP}=$obj->sim_group;
731 foreach my $i (1..$NODES) {
732 if ($i==int($SINKNODE) || $NODESKIP[$i]) {next;}
733 my $node=genNode($i);
734 my @summary=`cat /dev/sim/group$ENV{SIM_GROUP}/$node/sympathy/fault_inject`;
735 my($a,$b,$c,$d);
736 if ($summary[0]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
737 $a=$1; $b=$2; $c=$3; $d=$4;
738 } else {
739 printw "Error, can't get packet statistics from $ENV{SIM_GROUP} $node: '$summary[0]'\n";
740 }
741 if ($summary[1]=~/Sympathy\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)/) {
742 printw "Packet ($ENV{SIM_GROUP} $node send/recv) Sympathy:$a/${b}B $1/$2# Other:$c/${d}B $3/$4#\n";
743 }
744 }
745 }
746
747
748 killProcesses();
749
750 if ($BZIP2) {
751 system("bzip2 $SYMPATHY_DIR/group*/log? $SYMPATHY_DIR/group*/log?? $SYMPATHY_DIR/group*/emrun.log");
752
753 }
754 #if ($iter!=$ITERATIONS) {
755 printw "Waiting for processes to really end...";
756 sleep 12;
757 printw "done\n";
758 #}
759 }
760 printw "Sars all done, exiting.\n";
761 exit 0;
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.