#!/usr/bin/perl -w

# This process takes over the job of qsserver and saves/deletes
# PUM output.
#
# Use: pum_server.pl [KEEPDUMP=n] [REPLACE=0/1] [RUNID]
#
# Actions:
#   1. Converts files specified in "TOCONV" (usually means (pm, ps, py) and
#      some dump means (ds, dy)) to pp-format, and copies to archive filespace
#      the subset in TOARCH. We are told about those in TOMAIL. Additionally,
#      some files (in TOSS) are stashsplit (using pp2ss.pl) with a selection
#      of stashcodes that (can) depend on the filetype.
#   2. Saves start-of-year dumps (RUNID?.da??110) to $ARCHIVE_DUMP
#   3. Deletes all but last $KEEPDUMP (default 31) instantaneous dumps (da)
#      nb: 31 is tuned for if you do daily dumps and monthly means - you look
#      silly with =30 if writing the 1st of the month fails due to space...
#
# Upgrade:
#   Instead of saving the fieldsfiles, save pp files (using safe_convpp.pl) instead.
#   These are smaller, and they are what we want to use anyway
#   Include the -r flag to safe_convpp.pl - ensures that repeat runs overwrite old ones
#               -T flag - preserves creation date
#   Guess -s from _32 in whoami
#   KEEPDUMP=n on command line
#   REPLACE=1 now default
#   Guess RUNID from directory, or directory from RUNID... 
#   ***note*** if RUNID is guessed, then the process doesn't have RUNID in it
#              as far as "ps" is concerned, which makes REPLACE not work...
#

# Setup
use FileHandle;
chomp($me=`whoami`);				# Nb glob is a bit weird so avoid it
# Where to copy stuff to that we're archiving
$VN="vn4.5";
$DATAW_BASE="/home/$me/PUM_Output/$VN/";
$ARCHIVE="/home/$me/archive/";
$ARCHIVE_DUMP="/home/$me/archive_dump/";
# File types to convert to pp (eg, xaaana.pmj4feb)
@TOCONV=qw(pm ps py px pa pb);
# Of which the following subset get copied to archive
@TOARCH=qw(pm pa pb py px ps);
# File type to mail message upon saving
@TOMAIL=qw(ps py px);
# Files to stashsplit (only files in TOCONV list are allowed), default stashcode list, and extras list
%TOSS=qw(ps 1); @TOSS=keys(%TOSS);
$DefaultSCList="24,16222,146";	# SfcT,MSLP,IceC
my %ATOSS;
# $ATOSS{"ps"}="4";		# Also ask for theta for seasonals; make lists by 4,16222, etc
%PERIOD=qw(pm 0.01 ps 0.03 py 1);
# Extra options to safe_convpp.pl: -T (touch) -s (convert to 32-bit)
$SC_OPTS=" -T -r ";
# Now: do we want to add "-s" to SC_OPTS - ie are we running at 64-bit?
# This could be done per-job by adding "$SC_OPTS .= ' -s '" in the
# $RUNID-server-config file, but try to do it here based on "does $me contain '32'"?
if ($me !~ /32/) {
  $SC_OPTS.=" -s ";
};

# Command-line options?
# Ones I have in mind are "TEST" - don't actually move anything
$SET_DIR_TO_RUNID=1;		# Allows us to set directory to /home/$me/PUM_Output/vn4.5/dataw.$RUNID
$KEEPDUMP=31;			# Dumps to keep
$NOSAVE=0;
$TEST=0;			# Don't do anything
$NORUNCHECK=1;			# Don't check the job is running
$REPLACE=1;			# If set, and a server is running for this job already, replace it [make default]
$CLEANUP=0;			# If set, kill any running server and don't start a new one. Process all remaining files
eval "\$$1=\$2" while $ARGV[0] =~ /^(\w+)=(.*)/ && shift;
if ($TEST) { $TT="[test mode] " } else { $TT="" };

# Get runid and make archive directory (if not there already)
# As a bonus, if invoked in the DATAW directory, we don't need to set RUNID,
# and if invoked with RUNID we chdir to the DATAW directory (OK, OK, its really
# the DATAM directory, but I always run from DATAW...)
$RUNID=shift or $RUNID="";
if (length($RUNID) ne 5) { 
# Try to set from directory name
  chomp($pwd = `pwd`);
  $pwd=~s/^.*\.//;
  if (length($pwd) == 5) { 
    $RUNID=$pwd; 
    warn "Setting RUNID to $RUNID based on pwd"
  } else {
    die "The first parameter is the RUNID, eg xaaan (and failed to set from pwd)" 
  };
} else {
# If RUNID *is* 5, then we presume its valid, and try to set directory accordingly
# unless advised otherwise
  if ($SET_DIR_TO_RUNID) {
    print "chdir to $DATAW_BASE/dataw.$RUNID\n";
    chdir "$DATAW_BASE/dataw.$RUNID"
  } 
};
if (!-d "$ARCHIVE/$RUNID") { `mkdir -p $ARCHIVE/$RUNID` };
if (!-d "$ARCHIVE_DUMP/$RUNID") { `mkdir -p $ARCHIVE_DUMP/$RUNID` };
if (!-w "$ARCHIVE/$RUNID") { die "Can't write archive directory"};

# Check we're not already running on this job
my $repl_text="";
$txt=`ps -efl | grep " $me " | grep pum_server.pl | grep $RUNID | grep -v grep`;
if ($txt=~s/\n/\n/g > 1) { 
# If we are running, perhaps we want to replace the one that is running?
print "\ngoat\n";
  if ($REPLACE or $CLEANUP) {
    $txt=`ps -efl | grep " $me " | grep pum_server.pl | grep $RUNID | grep -v grep | grep -v " $$ "`;
    ($pid)=($txt=~/$me\s+(\d{1,9})\s+/);
    print "OK: I'm going to kill your old server, pid $pid\n";
    print `kill $pid`;
    $repl_text="[I replaced server with PID $pid]\n";
  } else {
    die "Sorry: a server is already running on this job - specify REPLACE=1 to replace it\n" 
  };
};

# Output file
open OUT,">> $RUNID.server.out" or die "Can't open server output file";
autoflush OUT 1;
print OUT "\n--- ".$TT."Starting server ($0) at: ",`date`;
print OUT $repl_text;
# Eval any job-specific stuff
if (-r "$RUNID-server-config") { 
  open(IN,"$RUNID-server-config"); 
  while ($_=<IN>) { 
    print OUT "From $RUNID-server-config: $_";
    eval $_ 
  };
  close(IN) 
};
$KEEPDUMP1=$KEEPDUMP+1;
print OUT "SC_OPTS: $SC_OPTS, Keep: $KEEPDUMP, Conv: ",join(",",@TOCONV)," Arch (to $ARCHIVE):",join(",",@TOARCH)," Mail:",join(",",@TOMAIL)," SS:",join(",",@TOSS),"\n";
if ($CLEANUP) { print OUT "Cleaning up then exiting\n" };

# Loop while model is running
while (`ps -efl | grep -v pum_server.pl | grep "$RUNID"` or $TEST > 1 or $NORUNCHECK > 0) {

print OUT ".";

chomp($DATE=`date`);

#
# 1.
#
# Look for means to move (list by time and skip the most recent
# so as to not move one thats being written to) (unless $CLEANUP)
#
if ($TEST) { print "Section 1\n" };
if (!$NOSAVE) { for (@TOCONV) {
  $TYPE=$_;
# If in test mode...
  if ($TEST) { print "Looked for files like $RUNID?.$TYPE?????\n" };
# We want all files if we're cleaning up, and all-but-last otherwise
  if ($CLEANUP) { 
    $filelist=`ls -t $RUNID?.$TYPE????? 2> /dev/null`
  } else {
    $filelist=`ls -t $RUNID?.$TYPE????? 2> /dev/null | tail +2`
  };
  for (split("\n",$filelist)) {
    if ($TEST) { print "Found $_\n" };
    if ( grep(/$TYPE/,@TOARCH) ) { 
      $A="-o $ARCHIVE/$RUNID";
      $PPFILE="$ARCHIVE/$RUNID/$_";
      $AC="Archiving $PPFILE"; 
    } else {
      $AC="Converting"; 
      $A="";
      $PPFILE="$_"
    };
# Message into archiving file
    print OUT "\n".$TT."$AC $_ at $DATE";
# And archive/convert it (as a pp-field)
# Mail me if its one we care about
    if ( grep(/$TYPE/,@TOMAIL) and !$TEST ) { 
      `echo $AC as pp $_ at $DATE | Mail -s "From $RUNID" $me`
    } ;
    $PER=$PERIOD{$TYPE};
    $SCS=join(" ",($ATOSS{$PER},$DefaultSCList));
    $SCS=~s/ //g;
    if (!$TEST) { 
      print OUT "\n".`safe_convpp.pl $SC_OPTS $A $_`;
      if ($TOSS{$TYPE}) { 
        $DIR="/home/$me/pp_fields/$RUNID/$PER";
        if (!-d "$DIR") { print "will mkdir -p $DIR for stashsplit fields\n"; `mkdir -p $DIR` };
        print OUT "\nSS'ing ${PPFILE}.pp into $DIR/\n";
        print OUT `pp2ss.pl SC=$SCS OUTBASE="$DIR/" ${PPFILE}.pp` 
        };
    } else {
      print "Test: would have done: safe_convpp.pl $SC_OPTS $A $_";
      print "and                  : pp2ss.pl SC=$SCS OUTBASE=/home/$me/pp_fields/$RUNID/$PER/ ${PPFILE}.pp\n"
    };
  };
} };

#
# 2.
#
# Look for start-of-year dumps to save
#
if ($TEST) { print "Section 2\n" };
for (split("\n",`ls -t $RUNID?.da??110 2> /dev/null`)) {
  print OUT "\n".$TT."Archiving to $ARCHIVE_DUMP/$RUNID: $_ at $DATE";
  `mv $_ $ARCHIVE_DUMP/$RUNID` unless $TEST;
};

#
# 3.
#
# Look for dumps to remove, keeping the last $KEEPDUMP
#
if ($TEST) { print "Section 333" };
for (split("\n",`ls -t $RUNID?.da????? | tail +$KEEPDUMP1 2> /dev/null`)) {
  print OUT "\n".$TT."Removing: $_ at $DATE" or warn "?";
  unlink $_ unless $TEST;
};


# Now sleep for a while before going round again...
if ($CLEANUP) { print OUT "Cleanup finished; dying\n"; exit };

sleep 10*60;

};

print OUT "\n--- server terminated at $DATE since model has stopped running\n";
