#!/bin/perl

# TTxx and IIxx decoding.
# (c) WMConnolley (http://www.nerc-bas.ac.uk/public/icd/wmc/) April 1996.
#
# Disclaimer: this routine is not guaranteed to work. It does not decode the
#	full message. It works with perl 4. Program may be freely used
#	for non-commercial purposes but not for commercial use. No
#	redistribution without this header.
#	Please mail me (wmc@bas.ac.uk) with bugs and flaws (but not with
#	ideas for extras).
#
# Bugfixes:
#   29/08/96: wmc: allow possibility of missing wind reports (I_d figure in code
#                  tables) in AA and CC parts. 
#   08/10/96: wmc: correct formula for guessing thousands figure of the height
#                  below 500 hPa.
#   10/10/96: wmc: add error checking on wind direction and recognize no-winds
#                  at tropopause level
#   21/02/97: wmc: add code to swap tokens 1,2 for IIxx (mobiles) since, bizarrely,
#                  they have their ID in a different order from TTxx
#   25/04/97: wmc: add verify option
#   25/05/98: wmc: add option to set sfc_h at command line (as sfc_h=hhh)
#                  make sure that h is monotonic
#   18/06/98: wmc: Hmm, what did I do here?
#   30/09/99: wmc: Re-instate printing of the 1000 hPa level
#                  Fix interpretation of h of, eg, 561 at 1000 hPa: this should be
#                  h=500-561=-61 (yes really!). This affected other levels since the
#                  force-h-to-be-montonic bumped other levels up too!
#   08/11/99: wmc: change test to /(TT|II)(\w)\2/ from /(TT|II)(.)\2/ to handle
#                  rare circumstance when TTAA (etc) is corrupted as TT<strange-things>
#                  [happens in 1982/02/24 23Z at 89055]
#   03/12/99: wmc: fix (?) hydrostatic checks: use
#                    $thou=int((($sfcp-$pres)+sfc_h1/10-$h/10)/100+0.5)*1000; 
#                  not
#                    $thou=int((($sfcp-$pres)+sfc_h1/10-$h/10)/100)*1000;
#                  which is equivalent to using "nint" rather than "int"
#
# Use: temps.pl list-of-file-names 
# Example: temp.pl /data/met/simpson/oper/data/hour*.raw
#  (then use stick1.pl to stick sections A, B, C and D together and format them)
# Input:
#  1. List of file names to read through OR,
#  2. Read data from stdin OR,
#  3. If "lines" of input begin with a ftp or http URL, pull this in using
#     url_get (a seperate package).
# Output: data to stat.day.hour.section.temp
#		stat=station identifier (89002, etc)
#      		section=AA, BB, CC or DD 
#	 	day=dd (ie 01, 02, ...31)
#        	hour=hh (ie 01, 02, ...23)
#	  OR (if output=tty is set)
#	  data to stdout
#  Note: some perverts code the day, hour as // for a NIL= report. These
#   will be opened to /dev/null
#  Some error output to the terminal
#  Some (fairly junky) output to files
#    temp.decoding.general and
#    temp.decoding.junk.
# Input options can be set on the command line (before any file names) as
#   var=opt, for example "temps.pl output=tty input.raw | stick1.pl"
#   sensible ones are:
#     knot=1			- wind output in knots not m/s
#     output=tty		- write output to tty not files
#     lookinfirst=number	- look in first "number" characters of
#                                 a message to see if it contains "TTxx".
#                                 Defaults to 50.
#     dieonerrors=1             - die if evaluating a message produces a
#                                 fatal error in "eval". But generally, we'd
#                                 prefer to keep going.
#     N=number                  - start message counting at "number" not 1.
#                                 Fairly useless.
#     nofileoutput=1		- Never write any files (sets output=tty)
#     warn=0			- Turn off a few warning messages (sensible
#				  if output is piped direct to "stick1.pl")
#     verify=0			- Turn off input verification (unsafe)
#     sfc_text="text"		- Alter default ("Sfc: ") prefix
#
# Restrictions:
#  Limited error checking is done
#  Program accuracy not checked very much yet!
#  Guessing tens-of-thousands figure for geopotential height is done 
#    when the temp sections are stuck together by "stick1.pl". Guessing
#    the thousands figure is done here and is usually correct.
#  Files names used mean this will (probably) only run under UNIX.
#
# Notes:
#  IIxx decoding is done by stripping the II-type header then using the
#  corresponding TTxx subroutine
#  This version preceedes each line with stationid etc so that stick1 can
#  put together multiple messages in piped output
#  I'm not totally sure about the knots to m/s conversion...
#  The "no data value" is -999
#  Below 500 hPa, the thousands figure on the geopotential height has to 
#  be guessed at.
#

#
# Begin code
#

# Set "end-of-section" to = (usually \n)
$/="=";
# Count messages. Start with none... (unless reset from command line)
$N=0;
# Set number of chars to seek "TTxx" in. Usually 50. If set too large,
#   may get confused in text messages. Can be reset from command line.
$lookinfirst=50;
# Warning messages initially turned on
$warn=1;
# Verification of safety of input (no "'"'s, etc) turned on
$verify=1;
# Text to prefix the surface obs with
$sfc_text="Sfc: ";
# Set sfc hieght to unknown (is set only by IIAA) but allow this to be over-ridden at command line
$sfc_h=-999;

# Set command-line switches (see PERL book p256!), can override any of the
# above.
eval "\$$1=\$2" while $ARGV[0] =~ /^(\w+)=(.*)/ && shift;

# Open some output files for various messages. These may not be wanted
# in which case set "nofileoutput"
if ($nofileoutput) {
  $generalfile="/dev/null";
  $junkfile="/dev/null";
  $filesfile="/dev/null";
  $output=1; }
else {
  $generalfile="temp.decoding.general";
  $junkfile="temp.decoding.junk";
  $filesfile="temp.decoding.files"; 
}
open(GENERAL,"> ".$generalfile) || die "Can't open general output file";
print GENERAL `date`,"\n";
open(JUNK,"> ".$junkfile) || die "Can't open junk output file!";
open(FILES,"> ".$filesfile) || die "can't open list of files written";
#
# Main loop through the input.
#
while (<>) { 
  if (m+^\s*(ftp|http)://+i) {
    s/=$//; s/\n$//;
#    print "\% trying url_get on $_";
# If we have a URL, replace the current line with its contents
    $_=`./url_get $_` || die "%url_get failed";
#    print " read in ".s/\n/\n/g." lines.\n";
  };
# Check for bad children. Note stick.pl recognises lines beginning with % as
# error reports.
  0=~/(0)/;
  s/([^\/\w%&+?=: \n\cM])//;
  if ($1 and $verify) {print "%It really is not polite to give me input ($1) like that."; exit};
# If we have url-got, we may have multiple reports
  @LINES=split($/,$_);
  while ($LINE=shift @LINES) {&decode($LINE)}
}

sub decode {
  ($_)=@_;

# Ignore all non-XXxx "lines" (XX=TT or II)
# To try to prevent confusion with "text" messages, insist the XXxx comes in
#   first [50] chars
#
  if (substr($_,0,$lookinfirst) =~ /(TT|II)(\w)\2/ ) {
# Save section (AA, etc)
  $SECT=$2.$2;
# Save type (usually TT, sometimes II)
  $TYPE=$1;
  $N++;
  @tokens=split;
  @tokens[$#tokens] !~ s/=//;
  print GENERAL "\nMessage number: $N\n";
# Throw away header tokens up to "XXxx"
  print JUNK "Throwing away: ";
  while ( $tokens[0] !~ /$TYPE$SECT/) {print JUNK shift(@tokens)," ";};
# We've found section "$SECT"; now go to appropriate subroutine
  print JUNK "\nFound type $TYPE and section $SECT with $#tokens groups\n";
  eval '&section_'.$TYPE.$SECT.'(@tokens)';
# Print any error messages
  if ($warn) {print $@};
  if ($dieonerrors) {die if $@};
} }

#
# Subroutine to get date, wind indicator and levels and station from XXxx
# Also, open station output file, $stat.temp
#
sub get_dws {
# Get date
  $_=shift(@_);
  &check5($_);
  $day=substr($_,0,2); $hour=substr($_,2,2);
# Wind levs is an indicator of whether wind levels are included (I_d)
  $wind_levs=substr($_,4,1);
  print GENERAL "Wind indicator is $wind_levs (";
  if ($wind_levs eq "/") {$wind_levs=99};
  print GENERAL "$wind_levs)\n";
  if ($day > 50) {$day=$day-50; $knots=1} else {$knots=0};
# Get station id
  $stat=shift(@_);
# Open output file (first deleting old one if it exists) and checking for junk
# names
  if (($day, $hour) =~ s+/++) {
    $file="/dev/null"} 
  else {
    $file="$stat.$day.$hour.$SECT.temp";
  }
  unlink $file; 
  if ($output) {open(OUT,">-")} 
  else {open(OUT,"> $file") || die "failed to open $file"};
  print FILES "$file\n";
  if ($knots) {print FILES "(Wind speed in knots has been converted to m/s)\n";}

# Return value
  ($day, $hour, $knots, $stat);
}

#
# Subroutine to get temp and dew depression from TTTDD groups
#
sub get_ttd {
  $_=@_[0];
  &check5($_);
  $TEMP=substr($_,0,3);
  if ($TEMP eq "///") {$TEMP=-999} else {if ($TEMP % 2 ==0) {$TEMP=$TEMP/10.} else {$TEMP=-$TEMP/10.}};
  $DEW=substr($_,3,2);
  if ($DEW eq "//") {$DEW=-999} else {
    if ($DEW < 50) {$DEW=$DEW/10.} else {$DEW=$DEW-50};
  }

# Return
  ($TEMP, $DEW);
}

#
# Subroutine to check a group is 5 characters
#
sub check5 {
  if (length(@_[0]) != 5 && $warn) {print GENERAL "St: $stat: Group of wrong length: @_[0]\n"}
  if (length(@_[0]) != 5) { return 0} else {return 1};
}

#
# Subroutine to handle DDFFF group
# Note naughty use of global "knots"!
# Note: knots is true if this message uses knots to report winds
#       knot is true if we want output in knots rather than m/s
#
sub get_ds {
  $_=@_[0];
  if (!&check5($_)) { return (-999,-999) };
  $DIR=substr($_,0,2);
  $SPD=substr($_,2,3);
  if ($DIR eq "//") {$DIR=-999} else {
    if ($SPD > 499) {$SPD=$SPD-500; $DIR=$DIR+.5};
    $DIR=$DIR*10;
  };
  if ($SPD eq "///") {$SPD=-999} else {
    if ($knots && !$knot) {$SPD=$SPD*0.5148}
    elsif (!$knots && $knot) {$SPD=$SPD/0.5148};
  };

# Return value
  ($DIR, $SPD);
}

#
# Subroutine to handle TTAA
#
sub section_TTAA {
  @tokens=@_;
# Throw away section identifier
  shift(@tokens);
# Get Date etc
  ($day, $hour, $knots, $stat) = &get_dws((shift(@tokens),shift(@tokens)));
  print GENERAL "D, H, S: $day, $hour, $stat\n";
# Handle NIL returns
  if (@tokens[0] eq "NIL") {return};
# Handle 99PPP
  &check5($tokens[0]);
  $sfcp=substr(shift(@tokens),2,3); if ($sfcp < 200) {$sfcp+=1000};
# Get T and Td
  ($temp, $dew)=&get_ttd(shift(@tokens));
  ($dir, $speed)=&get_ds(shift(@tokens));
# Write sfc (note: sfc_h is set for IIAA else is -999 for TTAA)
  print OUT "$sfc_text$stat.$day.$hour $sfcp $sfc_h $temp $dew $dir $speed\n";

# Loop through looking for "PPhhh" (which are followed by TTTdd ddfff).
# Use associative array "levs", while loop will fail when lookup fails.
  %levs=('00', 1000,   92, 925,   85, 850,   70, 700,   50, 500,   
           40,  400,   30, 300,   25, 250,   20, 200,   15, 150,   10, 100);

# sfc_h is a value, or -999 for no value
# For use in the psuedo-hydrostatic guess at the thousands figure in the
# height, we need sfc_h1 which is =sfc_h if we know it, else =0.
# If sfc_h is unknown but is really largeish (500m+) there is a good chance
# of getting the thousands figures wrong!
  if ($sfc_h != -999) { $last_h=$sfc_h; $sfc_h1=$sfc_h } else { $last_h=0; $sfc_h1=0 };
  while ($pres=$levs{substr($NEXT=shift(@tokens),0,2)}) {
    $h=substr($NEXT,2,3);
    if ($h=~/\//) { 
      $h=-999.0 
    } else {
# If at 1000 hPa and h>500, then the real value is (500-h) (bizarre but true, folks!)
      if ($pres == 1000 and $h>500) {
        $h=500-$h
# Below 500, guess thousands figure. Above, *10.
      } elsif ($pres <= 500) {
        $h=$h*10
      } else {
        $thou=int((($sfcp-$pres)+sfc_h1/10-$h/10)/100+0.5)*1000; $h=$h+$thou;
        if ($h < $last_h) { $h+=1000 };
      };
      $last_h=$h;
    };
    ($temp, $dew)=&get_ttd($NEXT=shift(@tokens));
# Get wind dir and speed, if wind_levs says we should
    if ($wind_levs*100<=$pres) {
      ($dir, $speed)=&get_ds(shift(@tokens))
    } else {
# A relatively common error is to miscode two groups of solidi for
# the 1000 hPa level even if "no winds" has been specified
      if ($pres == 1000 && $NEXT eq "/////") {
        if (($NEXT=shift @tokens) ne "/////") {unshift(@tokens,$NEXT)}
      };
      $dir=-999; $speed=-999;
    };
    #if ($pres <= $sfcp) 
    {print OUT "$stat.$day.$hour $pres $h $temp $dew $dir $speed\n"};
  }
# Expect 88xxx group - pressure at tropopause
  if ($NEXT !~ /^88(.{3})$/) {print JUNK "No 88xxx group\n";} else {
    if ($1 eq "999") {print JUNK "88 group is 88999\n"} else {
      $pres=$1;
      ($temp, $dew)=&get_ttd(shift(@tokens));
      if ($wind_levs*100<=$pres) {
        ($dir, $speed)=&get_ds(shift(@tokens))
      } else {
        $dir=-999; $speed=-999;
      };
      print GENERAL "Tropopause group: $pres, -999, $temp, $dew, $dir, $speed\n";
      print OUT "$stat.$day.$hour $pres -999 $temp $dew $dir $speed\n";
    }
  }
# Ignore any 77 or 66 groups
  print GENERAL "$TYPE"."AA: Throwing away: @tokens\n";
}

#
# Subroutine to handle TTBB
#
sub section_TTBB {
  @tokens=@_;
# Throw away section identifier
  shift(@tokens);
# Get date etc
  ($day, $hour, $knots, $stat) = &get_dws((shift(@tokens),shift(@tokens)));
  print GENERAL "D, H, S: $day, $hour, $stat\n";
#
# Loop through looking for "nnppp". Record "ppp" in $2. These are T, Td groups.
#
  while (($NEXT=shift(@tokens)) =~ /^(.)\1(...)/) {
# Extract pressure
    $PRES=$2;
    if ($PRES < 100) {$PRES=$PRES+1000};
# Next group: Temp and Dew
    ($TEMP, $DEW)=&get_ttd(shift(@tokens));
# Output
    print OUT "$stat.$day.$hour $PRES -999 $TEMP $DEW -999 -999\n";
# Next 2 groups...
  }
#
# Now look for 21212, the wind groups.
  if ($NEXT eq "21212") { while (($NEXT=shift(@tokens)) =~ /^(.)\1(...)/) {
    $PRES=$2;
    if ($PRES < 100) {$PRES=$PRES+1000};
# Next group: wind dir and speed
    ($DIR,$SPD)=&get_ds(shift(@tokens));
    print OUT "$stat.$day.$hour $PRES -999 -999 -999 $DIR $SPD\n";
  } } else { print GENERAL "No 21212 groups\n"};
#
# Now throw away regional/national groups (51515 etc)
#
  print GENERAL "$TYPE"."BB: Throwing away: $NEXT @tokens\n";  
}

#
# Subroutine to handle TTCC
#
sub section_TTCC {
  @tokens=@_;
# Throw away section identifier
  shift(@tokens);
# Get Date etc
  ($day, $hour, $knots, $stat) = &get_dws((shift(@tokens),shift(@tokens)));
  print GENERAL "D, H, S: $day, $hour, $stat\n";
# Loop through looking for "PPhhh" (which are followed by TTTdd ddfff).
# Use associative array "levs", while loop will fail when lookup fails.
  %levs=(70, 70,   50, 50,  30, 30,  20, 20,   10, 10);
  while ($pres=$levs{substr($NEXT=shift(@tokens),0,2)}) {
    $h=substr($NEXT,2,3)*10;
    ($temp, $dew)=&get_ttd(shift(@tokens));
# Get wind dir and speed, if wind_levs says we should
    if ($wind_levs*10<=$pres) {
      ($dir, $speed)=&get_ds(shift(@tokens))
    } else {
      $dir=-999; $speed=-999;
    };
    print OUT "$stat.$day.$hour $pres $h $temp $dew $dir $speed\n";
  }
  print GENERAL "$TYPE"."CC: Throwing away: @tokens";
}

sub section_TTDD {
  @tokens=@_;
# Throw away section identifier
  shift(@tokens);
# Get date etc
  ($day, $hour, $knots, $stat) = &get_dws((shift(@tokens),shift(@tokens)));
  print GENERAL "D, H, S: $day, $hour, $stat\n";
#
# Loop through looking for "nnppp". Record "ppp" in $2. These are T, Td groups.
#
  while (($NEXT=shift(@tokens)) =~ /^(.)\1(...)/) {
# Extract pressure
    $PRES=$2/10.;
# Next group: Temp and Dew
    ($TEMP, $DEW)=&get_ttd(shift(@tokens));
# Output
    print OUT "$stat.$day.$hour $PRES -999 $TEMP $DEW -999 -999\n";
# Next 2 groups...
  }
#
# Now look for 21212, the wind groups.
  if ($NEXT eq "21212") { while (($NEXT=shift(@tokens)) =~ /^(.)\1(...)/) {
    $PRES=$2/10.;
# Next group: wind dir and speed
    ($DIR,$SPD)=&get_ds(shift(@tokens));
    print OUT "$stat.$day.$hour $PRES -999 -999 -999 $DIR $SPD\n";
  } } else { print GENERAL "No 21212 groups\n"};
#
# Now throw away regional/national groups (51515 etc)
#
  print GENERAL "$TYPE"."DD: Throwing away: $NEXT @tokens\n";

}

#
# Sub to handle IIAA.
# Basically, jigger the message so it looks like a TTAA...
# Note: global sfc_h is set.
#
sub section_IIAA {

@tokens[1,2]=@tokens[2,1];
($lat,$lon,$mmm,$sfc_h)=splice(@tokens,3,4);
($lat,$lon)=&marsden2lalo($lat, $lon, $mmm);
$h_ind=substr($height,4,1);
$sfc_h=substr($sfc_h,0,4);
# Convert feet->meters if needed
if ($h_ind > 5) {$sfc_h=$sfc_h/3.28084};
eval '&section_TT'.$SECT.'(@tokens)';

}

#
# BB, CC, DD require the same strip-off-the-header approach - so let
# the IIAA code handle it. Isn't this beautifully flexible?
#
sub section_IIBB { &section_IIAA(@_); }
sub section_IICC { &section_IIAA(@_); }
sub section_IIDD { &section_IIAA(@_); }

#
# Given the lat, lon and marsden square groups of FM-38, return the lat,
# and longitude. Note: the longitude bit is not well tested!
#
sub marsden2lalo {
  ($la,$lo,$m)=@_;
  $marsden=substr($m,0,3);
  $lat=substr($la,2,3)/10.;
  $lon=substr($lo,1,4)/10.;
# 315 < Marsden square > 624 implies SH
  if ($marsden > 315 && $marsden < 624) {$lat=-$lat};
# Longitude is harder...
  $lo_i=1;
  if ($lo < 289) {if (($lo % 36) < 18) {$lo=-1}}
  else {if ($lo < 624) {if ((($lo-299) % 36) < 18) {$lo_i=-1}}
  else {if ((($lo-900) % 36) < 18) {$lo_i=-1}}};
  $lon=$lon*$lo_i;

  return ($lat,$lon);
} 
