#!/usr/bin/perl 
#-w
#
# KAL -- Extract metadata using original netcdf files, and
#
#
# Lib dirs are specific to mahaleb
#
#

package metadata;

BEGIN {}

use lib qw(/export/home/nersc/Perl/lib/perl5
           /export/home/nersc/Perl/lib/perl5/site_perl);
use XML::Writer;
use IO::File;
use List::Util qw(min max);
use strict;


sub getrawncdump($) {
   # Get netcdf dump output - split into an array with one entry for each line
   my ( $file )  = @_;
   my $varstring=`ncdump -h $file` or die "Can not run ncdump on $file\n";
   my @ncdump = split "\n" , $varstring ;
   return @ncdump;
}

sub spatialCoverage($$$) {
   # Get netcdf dump output - split into an array with one entry for each line
   my ( $file, $writer , $metatagenclose )  = @_;

   # ahemm..
   #my $lons  = `ncks -s "%f\n" -H -C -v longitude  $file | sort -n | head -n1`;
   #print $file ;
   my $lons  = `ncks -s "%f\n" -H -C -v longitude  $file`;
   my $lats  = `ncks -s "%f\n" -H -C -v latitude  $file`;
   my $depths= `ncks -s "%f\n" -H -C -v depth  $file`;
   my @lons = split "\n" , $lons ;
   my @lats = split "\n" , $lats ;
   my @depths = split "\n" , $depths ;

   my $maxlon = max @lons;
   my $minlon = min @lons;
   my $maxlat = max @lats;
   my $minlat = min @lats;
   my $maxdepth = max @depths;
   my $mindepth = min @depths;

   #print "$minlon  \n";
   #print "$maxlon  \n";
   #print "$minlat  \n";
   #print "$maxlat  \n";
   #print "$mindepth  \n";
   #print "$maxdepth  \n";

   #exit 1 ;

   # Timecoverage info
   if ($metatagenclose eq 'yes') {
      $writer->startTag('metadata', 'inherited' => "true"); }
   $writer->startTag('geospatialCoverage', 'zpositive' => "down");

   $writer->startTag('northsouth');
   $writer->startTag('start');
   $writer->characters($minlat);
   $writer->endTag('start');
   $writer->startTag('size');
   $writer->characters($maxlat-$minlat);
   $writer->endTag('size');
   $writer->endTag('northsouth');

   $writer->startTag('eastwest');
   $writer->startTag('start');
   $writer->characters($minlon);
   $writer->endTag('start');
   $writer->startTag('size');
   $writer->characters($maxlon-$minlon);
   $writer->endTag('size');
   $writer->endTag('eastwest');

   $writer->startTag('updown');
   $writer->startTag('start');
   $writer->characters($mindepth);
   $writer->endTag('start');
   $writer->startTag('size');
   $writer->characters($maxdepth-$mindepth);
   $writer->endTag('size');
   $writer->endTag('updown');

   $writer->endTag('geospatialCoverage');
   if ($metatagenclose eq 'yes') {
      $writer->endTag('metadata'); }


   #print join "\n" , $lons ;


}

sub getvars($@) {
   # Get variables, 
   my ( $txt , $i , @variables );
   my $searchtag= shift @_; 
   my @vartmp = grep /$searchtag/ , @_;

   foreach $i (@vartmp) {
	  #print "$i\n";
      $txt=$i;
      $txt =~ s/^\t*// ;
      $txt =~ s/$searchtag.*// ;
      push @variables , $txt ;
   }
   return @variables;
}

sub getvarattribute($$@) {
   my ( $varname , $attrib , @ncin ) =  @_; 
   my @attribline = grep /^[ \t]*$varname:$attrib/ , @ncin;
   my $attribvalue = $attribline[0];
   #if ($varname eq "y" ) { 
   #print "@attribline  \n";
   #}
   $attribvalue =~ s/.*$varname:$attrib[ ]*=[ ]*//;
   $attribvalue =~ s/[ ]*;[ ]*//;
   $attribvalue =~ s/"//g;
   #print "$attribvalue  \n";
   return $attribvalue;
}

sub gettimespan(@) {
   # Input are date part active in a given aggregation
   my  @dates  = @_;
   my $hourstring;

   #standard sort should be ok due to filename convention
   @dates = sort @dates ;
   my $start=@dates[0];
   my $end=@dates[$#dates];
   #print "$start $end \n";

   $start =~ s/.*_f// ; $start =~ s/\.nc// ;
   $end   =~ s/.*_f// ; $end   =~ s/\.nc// ;

   # Get time part of the filename
   my $s_year =substr $start , 0 , 4; my $e_year =substr $end   , 0 , 4;
   my $s_month=substr $start , 4 , 2; my $e_month=substr $end   , 4 , 2;
   my $s_day  =substr $start , 6 , 2; my $e_day  =substr $end   , 6 , 2;

   $hourstring="12:00:00Z";

   my $startstring="$s_year-$s_month-$s_day $hourstring";
   my $endstring  ="$e_year-$e_month-$e_day $hourstring";
   my $resolution ="1 day"; # Hardcoded - may need additional routines to get this correct at all times
   #print "Warn gettimespan:  - resolution is hardcoded to $resolution \n";

   return ( $startstring , $endstring , $resolution )
}

sub getexpiry($) {
   # Sub lists all files beginning with input, and tries to extract
   # the expiry time from that
   use File::Listing;
   use Date::Calc qw(Add_Delta_Days);

   my ( $filebase ) = @_;
   my ( @files , @files2) ;
   my ( $dname , $dtype );

   my @files=glob("$filebase");

   ### KAL -- Ugly hack starts
#   # Check first file -- if it is a directory, we try one catalogue further down
#   for (parse_dir(`ls -l $files[0]`)) {
#	  ($dname, $dtype) = @$_ ;
#   }
#
#   if ($dtype eq "d" ) {
#	  @files=glob("$filebase/$dname/*.nc");
#   }
# KAL - The above seems to fail on some linux machines 
   if (-d $filebase ) {
      @files=glob("$filebase/*.nc");
      opendir DIR, $filebase or die "can't read $filebase: $!";
      foreach (readdir DIR) {
         push @files, glob "$filebase" . "/$_" . "/*.nc";
      }
      closedir DIR;
   }
   print "Using workaround for assumed file input in metadata::getexpiry \n";
   ### KAL -- Ugly hack ends


   #print join ("\n" ,  @files );
   #print "$dname $dtype \n";
   #exit 1;


   #standard sort should be ok due to filename convention
   @files = sort @files or die "No file present with filebase $filebase\n";
   my $end=@files[$#files];

   $end   =~ s/.*_b// ; 
   $end   =~ s/_f.*// ;
   #print "$end \n";

   # Get time part of the filename
   my $e_year =substr $end   , 0 , 4;
   my $e_month=substr $end   , 4 , 2;
   my $e_day  =substr $end   , 6 , 2;
   # my $e_hour =substr $end   , 8 , 4; # Not for bulletin

   # Get 7 Days forward in time
   my $delta_days=7;
   print "Warn getexpiry:  - delta days is hardcoded to $delta_days \n";
   #print "$end $e_year $e_month $e_day \n";
   my ($exp_year,$exp_month,$exp_day) = Add_Delta_Days( $e_year , $e_month, $e_day, $delta_days );
   #print "$e_day \n";
   #print "$exp_day \n";
   #
   #exit 1;

   my $hourstring="12:00:00Z";
   my $expirystring  ="$exp_year-$exp_month-$exp_day $hourstring";

   return $expirystring;
}


sub metadata_timeCoverage($$@) {
   my ( $writer , $metatagenclose , @datestrings ) =@_;
   my ( $starttime , $endtime, $resolution) = gettimespan(@datestrings)  ;

   # Timecoverage info
   if ($metatagenclose eq 'yes') {
      $writer->startTag('metadata', 'inherited' => "true"); }
   $writer->startTag('timeCoverage');

   $writer->startTag('start');
   $writer->characters($starttime);
   $writer->endTag('start');

   $writer->startTag('end');
   $writer->characters($endtime);
   $writer->endTag('end');

   $writer->startTag('resolution');
   $writer->characters($resolution);
   $writer->endTag('resolution');

   $writer->endTag('timeCoverage');
   if ($metatagenclose eq 'yes') {
      $writer->endTag('metadata'); }

   return $writer
}

sub agglevelproperty($$$$){ 
   my ( $property , $metadatatag , $writer , $dupath ) = @_ ;
   my ( $dset , $prop );

   $dset=0;
   $prop=0;


   # Only these qualify for product - first is for testing
   if ( $property eq 'product' ) {
	  if ( $dupath eq 'mersea-ip/nattest/tmipntest-class1' or
		 $dupath eq 'mersea-ip/nat/tmipn-class1'         or
		 $dupath eq 'mersea-ip/arctic/tmipa-class1'        ) {
		 $dset=1;
	  }
   } else {
	 $dset=1;
   }

   if ( $property eq 'product' or $property eq 'view' or
        $property eq 'organization' ) {
	  $prop=1;
   } else { 
	  print "Warn : agglevelproperty - unknown property $property\n";
   }

   if ( $dset == 1 and $prop == 1) {

	  if ( $metadatatag eq 'yes' ) {
		 $writer->startTag('metadata','inherited' => 'true'); 
	  }

	  $writer->emptyTag('property',
						'name'  => 'aggregation-level',
						'value' => $property);

	  if ( $metadatatag eq 'yes' ) {
		 $writer->endTag('metadata'); 
	  }
   }

   return undef ;
}

sub varmetadata($@){ 
   my $i ;
   my ( @longnames, @units , @standardnames );

   my ( $writer , @ncdump ) = @_ ;
   my @variables = getvars ":units" , @ncdump  ; # We use ":units" to get vars ... dangerous but..

   # Get variable attributes
   foreach $i (@variables) {
      push @longnames , getvarattribute $i , "long_name" , @ncdump ;
      push @standardnames , getvarattribute $i , "standard_name" , @ncdump ;
      push @units     , getvarattribute $i , "units"     , @ncdump ;
   }


   # List variable names
   $writer->startTag('variables','vocabulary' =>  "CF-1.0");
   for ($i = 0 ; $i <= $#variables ; $i++) {
      $writer->startTag('variable','name'            =>  $variables[$i],
                                   'vocabulary_name' =>  $standardnames[$i],
                                   'units'           =>  $units[$i]);
      $writer->characters($longnames[$i]);
      $writer->endTag('variable');
   }
   $writer->endTag('variables');

   return  undef ;
}


   

###################################################################################
###################### MAIN ROUTINE - Call outside for testing#####################
###################################################################################

# This is the 
sub main_metadatagen($$$$) {
   use File::Listing;


   my ( $filebase , $metatagenclose , $writer , $spatialcoverage ) = @_;
   my ( $i , $myatt );
   my ( @longnames, @units , @standardnames );
   my ( $dname , $dtype ) ;


   my @files=glob("$filebase");
   #print join "\n" , @files ;

   ### KAL -- Ugly hack starts
   # Check first file -- if it is a directory, we try one catalogue further down
   for (parse_dir(`ls -l $files[0]`)) {
	  ($dname, $dtype) = @$_ ;
   }

   #print "$dname \n";

   if ($dtype eq "d" ) {
	  @files=glob("$filebase/$dname/*.nc"); # This assumes everything under here are nc files
   }
   print "Using workaround for assumed file input in metadata::main_metadatagen \n";
   #print join "\n" , @files ;
   ### KAL -- Ugly hack ends




   my @ncdump = getrawncdump $files[0] or exit 1;


   if ($metatagenclose eq 'yes') {
      $writer->startTag('metadata', 'inherited' => "true"); }

   # Metadata listing variables
   &varmetadata($writer,@ncdump) ;


   # Spatial coverage
   #print "Spatial Coverage tag $spatialcoverage \n";
   if ( $spatialcoverage eq 'yes' ) {
	  &spatialCoverage( $files[0] , $writer , 'no' ) ;
   }




   if ($metatagenclose eq 'yes') {
      $writer->endTag('metadata'); 
   }



   return undef ; 
}

return 1;

END {}
