/* Program name: Step4_MS_ReformatToSAS_STORET_WQ_data.sas
   Written by: Anne Hoos and Greg Schwarz 
   Date: 6/1/2005

   First part of the program reformats the water-quality data file from modernized STORET(tilde-delimited)
     to a comma-delimited file, interpreting information from several variables (characteristic name,
     sample fraction, and media) into an assignment of parameter code following the convention used 
     in Legacy STORET and in NWIS, and populating the variable RemarkCode for results below detection.
     The comma-delimited file can then be processed using the reformatting procedure that Greg Schwarz 
     developed for STORET.  
*/
/*  Second part takes the comma-delimited file containing processed data
   (sorted, Pcode assigned)from modernized-STORET output, 
   and creates a SAS file in the format (one line per sample) used by 
   Fluxmaster. Multiple stations may be included in the analysis. If a previous SAS file
   has been created, the program has the option of appending results to it. 


   Note, the program does not modify measurements with a value of zero.

   Note, if there are multiple observations of a variable on a given day, only the last value
   is retained in the SAS data set.

   The execution log file will always contain the following error message, which is harmless:
NOTE: Invalid argument to function INPUT at line 56 column 183.
RULE:     ----+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+--
1         Org_Code,Org_Name,Station_No,StateName,CountyName,huc8,StationTypeMod,Medium,SampleFrac
      88  tion,ValueType,StatType,ResultText,lat,lon,ActivityID,Depth,Depthunit,resultnumber,Unit
(etc.  -   it continues for about 10 more lines)

   This is generated when the program tries to make sense out of the first line of the csv file,
       which is column headings 
 
he execution log may also contain the following error message,  also harmless:

NOTE: Invalid numeric data, DetectionLimit='mg/l' , at line 3335 column 19.
ERROR: Limit set by ERRORS= option reached.  Further errors of this type will not be printed.


This is generated for observations for which the field DetText is populated only with 'mg/L'  (rather
    than beginning with a value, then blanks,  then text, such as mg/L as is the more typical format).
Because DetectionLimit is scanned as the first part of the DetText field,  detection limit for these 
cases is interpreted as 'mg/L', which causes an error when SAS attempts to read this as numeric.    
The format error is not serious, since no meaningful information about
    the detection limit was available.

BUT,  if the Invalid data for .....  warning appears in the Log for any variable other
than DetectionLimit, should check input file to make sure all 32 fields are
present.  Occasionally this warning occurs for a field (Lower Quantification Limit) 
that is expected numeric but has alphanumeric.

*/


/* ------------------------------------------------------------------------------------------ */

/* Modify the following header information each time program is run */

/* Specify the directory where the NWIS water-quality data are stored and where the SAS
   file will be created */
%let home = K:\regnawqa\MRB-2\Nut_TrendsSparrow\instreamdata\qwdata\STORET_modernized\Stationlists_Datafiles\WQData\Repull_fullparms_sept05\SFL ;

/*  VERY IMPORTANT  -    NEED TO ALSO FIND THE 'PROC EXPORT STATEMENT, ABOUT HALFWAY
            DOWN IN THE FILE,  and modify the pathname given in the OUTFILE = command so
            that it's identical to the pathname specified for home */

/* Specify the name of the raw STORET data file. */
%let STORET_rawwqdata = mem42c.txt ;

/* Specify the name of the SAS output file  -  to contain the data set with one line per sample.  */
%let SAS_output_name = Modstoret_wqdata_42c ;

/*  ALSO VERY IMPORTANT   -  When doing a series of executions on several files, suggest
       watch that each processed file is a different size from the last  (or check for fatal errors in execution log), 
       and watch that all files are larger than 17 kb (an empty file).   File size < 17 kb indicates that
       the input text file does not have the full set of fields (i.e. the proper set of
       data elements was not requested during retrieval)*/
/* ------------------------------------------------------------------------------------------ */
/*   PART I   */

libname dir "&home" ;
filename indata "&home.\&STORET_rawwqdata" ;

/* Delete files from previous runs (can confuse the issue of whether execution was successful)*/

    proc datasets library = work nolist ;
	  delete Input_data ;
	  delete temp1 ;
	  delete temp2 ;
	  delete temp3 ;
	run ;

data work.temp1;
  infile indata dsd delimiter = '~' missover lrecl = 1024 ;  
  length  Org_Code $ 8 Org_Name $ 52 Station_No $ 16 /* station_name $ 60 */ StateName CountyName $ 30
   huc8  $ 10 StationTypeMod $ 1 Medium $ 5 ActivityStart $ 19 ActivityCat $14 visit_num $ 5
    CharacName $ 50    SampleFraction $ 12  ValueType $ 6 StatType $ 5 ResultText $ 12 AnalytMethod $ 30  DetText $ 23
	
/*  Note - only define length for character variables of interest  -  
	    (how are other character variables read? (i.e. what is default)) */
;

/* Skip over first line */
   if count <= 0 then do ;
        input @1 first_char $1 ;
        count +  1 ;
      end ;
      else do ;

input 
Org_Code $ Org_Name $ Station_No  $ /* station_name $ */ StateName $ CountyName $ huc8 $ Genhuc8 $ lat lon horiz_datum $ StationTypeMod $ 
 visit_num ActivityID $ ActivityStart $ TimeZone $ Medium $ ActivityType $ ActivityCat $ Depth Depthunit $ 
 CharacName $ SampleFraction $ ValueType $ StatType $ ResultText $ resultnumber Units $ 
 Convresultnumber ConvUnits $ AnalytMethod $ DetText  DetLimitDescrip $ LowerQuantLimit UpperQuantLimit;
  end ;
  run;

  data work.temp2 ;
  set work.temp1 ;
 /* Delete first line (identified because the first_char variable is populated) */
if first_char = 'O' then delete ;

/* Parse out ActivityStart into date and time */
  length timehr $ 2 timemin $ 2 date $ 10 ;
  date=scan(ActivityStart, 1, ' ');
  timehrminsec=scan(ActivityStart, 2, ' ');
  timehr=scan(timehrminsec, 1, ':');
  timemin=scan(timehrminsec, 2, ':');
  time=timehr!!timemin ;
  datetime = date!!'.'!!time ;

  station_id = Org_Code!!'.'!!Station_No ;
  sample_id = station_id!!'.'!!datetime ;

  /* Convert Characteristic Name to the standard NWIS parameter code.   
      The conversions assume as default that all results are reported in mg/L unless
      the Units field is populated as ug/L. (In other words, if the Units field not populated 
      for a result, assume it's mg/L)
      Although this is risky, the Units field is frequently not populated.
      For example, noticed in one test retrieval (lat/long block in Georgia) that when result 
      is below detection (i.e. when ResultText = "*Non-detect"  no units are specified,
      so insisting on verifying that Unit was populated as 'mg/L' would cause loss of all the 
      non-detect data.  (A later processing step will check for instances where mg/L was wrongly assumed 
      for a ug/L result (impossibly large numbers).  
      Adding the criteria that Medium = water will rule out the possibility of misinterpreting
      values for some nutrient parameter code other than concentration in water. (This field is universally
      populated)

      Have also made some assumptions about sample fraction when that field not populated; 
      As with the Units field, Sample Fraction is not populated for a substantial number of observations; hence failure to translate to parmcode 
       for these observation would create a huge loss of data.  
      The assumptions were based on a survey of the data.  For example, when the sample fraction field for 
      "Nitrogen, ammonia (NH3) + ammonium (NH4)" isnot blank it contains 'Dissolved' or 'Filterable' 
       far more frequently than 'total'.         
 */


/* Total Nitrogen  -  translated to 00600*/
If CharacName = 'Nitrogen ion (N)' and SampleFraction = 'Total' and Medium = 'Water'  then pcode = '00600' ;

/* Ammonia characteristics - translated to 00608 00610 01845 01846  */
/* Assume 'Nitrogen, ammonia (NH3) + ammonium (NH4)' reported as N */
If CharacName = 'Nitrogen, ammonia (NH3) + ammonium (NH4)' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00610' ; 
If CharacName = 'Nitrogen, ammonia (NH3) + ammonium (NH4)' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '00610' ; 
If CharacName = 'Nitrogen, ammonia (NH3) + ammonium (NH4)' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '00610' ; 
If CharacName = 'Nitrogen, ammonia (NH3) + ammonium (NH4)' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00608' ; 
If CharacName = 'Nitrogen, ammonia (NH3) + ammonium (NH4)' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '00608' ; 
If CharacName = 'Nitrogen, ammonia (NH3) + ammonium (NH4)' and SampleFraction = '' and Medium = 'Water' then pcode = '00608' ; /* If sample fraction missing, interpret as dissolved */

If CharacName = 'Nitrogen, ammonia as N' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00610' ; 
If CharacName = 'Nitrogen, ammonia as N' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '00610' ; 
If CharacName = 'Nitrogen, ammonia as N' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '00610' ; 
If CharacName = 'Nitrogen, ammonia as N' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '00608' ; 
If CharacName = 'Nitrogen, ammonia as N' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00608' ; 
If CharacName = 'Nitrogen, ammonia as N' and SampleFraction = '' and Medium = 'Water' then pcode = '00608' ; /* If sample fraction missing, interpret as dissolved */

If CharacName = 'Nitrogen, ammonia (NH3) as NH3' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '01845' ; 
/* Match for this characteristic name should be 71845 or 71846, but 
   assign a dummy code so can identify these data as from modernized STORET.  Suspect that many agencies are
   inadvertently storing ammonia as N data in Modernized under this Characteristic Name */
If CharacName = 'Nitrogen, ammonia (NH3) as NH3' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '01845' ; 
If CharacName = 'Nitrogen, ammonia (NH3) as NH3' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '01845' ; 
If CharacName = 'Nitrogen, ammonia (NH3) as NH3' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '01846' ; 
If CharacName = 'Nitrogen, ammonia (NH3) as NH3' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '01846' ; 
If CharacName = 'Nitrogen, ammonia (NH3) as NH3' and SampleFraction = '' and Medium = 'Water' then pcode = '01845' ; /* If sample fraction missing, interpret as total */

/* Nitrate characteristics - translated to 00630 00631 00620 00618 01850 01851 */
If CharacName = 'Nitrogen, Nitrite (NO2) + Nitrate (NO3) as N' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00630' ; 
If CharacName = 'Nitrogen, Nitrite (NO2) + Nitrate (NO3) as N' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '00630' ; 
If CharacName = 'Nitrogen, Nitrite (NO2) + Nitrate (NO3) as N' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '00630' ; 
If CharacName = 'Nitrogen, Nitrite (NO2) + Nitrate (NO3) as N' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00631' ; 
If CharacName = 'Nitrogen, Nitrite (NO2) + Nitrate (NO3) as N' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '00631' ; 
If CharacName = 'Nitrogen, Nitrite (NO2) + Nitrate (NO3) as N' and SampleFraction = '' and Medium = 'Water' then pcode = '00630' ; /* If sample fraction missing, interpret as total */

If CharacName = 'Nitrogen, Nitrate (NO3) as N' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00620' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as N' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '00620' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as N' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '00620' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as N' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00618' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as N' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '00618' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as N' and SampleFraction = '' and Medium = 'Water' then pcode = '00618' ; /* If sample fraction missing, interpret as dissolved */


If CharacName = 'Nitrogen, Nitrate (NO3) as NO3' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '01850' ; 
/* Match for this characteristic name should be 71850 or 71851, but 
   assign a dummy code so can identify these data as from modernized STORET.  Suspect that many agencies are
   inadvertently storing nitrate as N data in Modernized under this Characteristic Name */
If CharacName = 'Nitrogen, Nitrate (NO3) as NO3' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '01850' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as NO3' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '01850' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as NO3' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '01851' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as NO3' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '01851' ; 
If CharacName = 'Nitrogen, Nitrate (NO3) as NO3' and SampleFraction = '' and Medium = 'Water' then pcode = '01850' ; /* If sample fraction missing, interpret as total  */
 
/* Kjeldahl (ammonia plus organic) characteristics - translated to 00625 00623 00624 00635 */
/* Assume these are reported as N */
If CharacName = 'Nitrogen, Kjeldahl' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00625' ;  
If CharacName = 'Nitrogen, Kjeldahl' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '00625' ;  
If CharacName = 'Nitrogen, Kjeldahl' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '00625' ;  

If CharacName = 'Nitrogen, Kjeldahl' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00623' ;  
If CharacName = 'Nitrogen, Kjeldahl' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '00623' ;  
If CharacName = 'Nitrogen, Kjeldahl' and SampleFraction = 'Suspended' and Medium = 'Water' then pcode = '00624' ;  
If CharacName = 'Nitrogen, Kjeldahl' and SampleFraction = '' and Medium = 'Water' then pcode = '00625' ;  /*Interpret all analyses are for total */

If CharacName = 'Nitrogen, ammonia (NH3)+ organic'  and Medium = 'Water' then pcode = '00635' ; /*Assume all analyses are for total - not sure if any data for this anyway */
If CharacName = 'Nitrogen, Ammonia + Organic'  and Medium = 'Water' then pcode = '00635' ; /*Assume all analyses are for total - new version of name Oct. 9, 2005 */

/* Organic nitrogen characteristics  -  translated to 00605, 00606, or 00607 */
If CharacName = 'Nitrogen, organic'  and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00605' ; 
If CharacName = 'Nitrogen, organic'  and SampleFraction = 'Suspended' and Medium = 'Water' then pcode = '00606' ; 
If CharacName = 'Nitrogen, organic'  and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00607' ; 
If CharacName = 'Nitrogen, organic'  and SampleFraction = '' and Medium = 'Water' then pcode = '06050' ; /* If sample fraction missing, interpret as a version of total  */



/*Assume all analyses are for total - not sure if any data for this anyway */

/* Phosphorus characteristics - translated to 00665 00666 00667 06550 70507 00671 71886 00660 */
If CharacName = 'Phosphorus as P' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00665' ;
If CharacName = 'Phosphorus as P' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '00665' ;
If CharacName = 'Phosphorus as P' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '00665' ;
If CharacName = 'Phosphorus as P' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00666' ;
If CharacName = 'Phosphorus as P' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '00666' ;
If CharacName = 'Phosphorus as P' and SampleFraction = 'Suspended' and Medium = 'Water' then pcode = '00667' ;
If CharacName = 'Phosphorus as P' and SampleFraction = 'Non-filterable' and Medium = 'Water' then pcode = '00667' ;

/* For Phosphorus as P, there's really no reasonable basis for assuming that an observation missing sample fraction information is
   either total or dissolved.  And unfortunately,  many of observations for Phosphorus as P have no information about sample fraction.
   To avoid losing data, assign a dummy variable (06650) and consider course of action .   */  
If CharacName = 'Phosphorus as P' and SampleFraction = '' and Medium = 'Water' then pcode = '06650' ; 

If CharacName = 'Phosphorus' and SampleFraction = 'Total' and Medium = 'Water' then pcode = '00665' ; 
If CharacName = 'Phosphorus' and SampleFraction = 'Acid Soluble' and Medium = 'Water' then pcode = '00665' ; 
If CharacName = 'Phosphorus' and SampleFraction = 'Fixed' and Medium = 'Water' then pcode = '00665' ; 
If CharacName = 'Phosphorus' and SampleFraction = 'Dissolved' and Medium = 'Water' then pcode = '00666' ; 
If CharacName = 'Phosphorus' and SampleFraction = 'Filterable' and Medium = 'Water' then pcode = '00666' ; 
If CharacName = 'Phosphorus' and SampleFraction = 'Suspended' and Medium = 'Water' then pcode = '00667' ; 
If CharacName = 'Phosphorus' and SampleFraction = 'Non-filterable' and Medium = 'Water' then pcode = '00667' ; 

/* For Phosphorus, there's really no reasonable basis for assuming that an observation missing sample fraction information is
   either total or dissolved.  
   To avoid losing data, assign a dummy variable (06650) and consider course of action .   */  
If CharacName = 'Phosphorus' and SampleFraction = '' and Medium = 'Water' then pcode = '06650' ; 

If CharacName = 'Phosphorus, orthophosphate as P' and SampleFraction = 'Total' and Medium = 'Water'  then pcode = '70507' ; 
If CharacName = 'Phosphorus, orthophosphate as P' and SampleFraction = 'Acid Soluble' and Medium = 'Water'  then pcode = '70507' ; 
If CharacName = 'Phosphorus, orthophosphate as P' and SampleFraction = 'Fixed' and Medium = 'Water'  then pcode = '70507' ; 
If CharacName = 'Phosphorus, orthophosphate as P' and SampleFraction = 'Dissolved' and Medium = 'Water'  then pcode = '00671' ; 
If CharacName = 'Phosphorus, orthophosphate as P' and SampleFraction = 'Filterable' and Medium = 'Water'  then pcode = '00671' ; 
If CharacName = 'Phosphorus, orthophosphate as P' and SampleFraction = '' and Medium = 'Water'  then pcode = '00671' ; /* If sample fraction missing, interpret as dissolved */

If CharacName = 'Phosphorus, orthophosphate as PO4' and SampleFraction = 'Total' and Medium = 'Water'  then pcode = '71886' ; 
If CharacName = 'Phosphorus, orthophosphate as PO4' and SampleFraction = 'Acid Soluble' and Medium = 'Water'  then pcode = '71886' ; 
If CharacName = 'Phosphorus, orthophosphate as PO4' and SampleFraction = 'Fixed' and Medium = 'Water'  then pcode = '71886' ; 
If CharacName = 'Phosphorus, orthophosphate as PO4' and SampleFraction = 'Dissolved' and Medium = 'Water'  then pcode = '00660' ; 
If CharacName = 'Phosphorus, orthophosphate as PO4' and SampleFraction = 'Filterable' and Medium = 'Water'  then pcode = '00660' ; 
If CharacName = 'Phosphorus, orthophosphate as PO4' and SampleFraction = '' and Medium = 'Water'  then pcode = '00660' ; /* If sample fraction missing, interpret as dissolved */

/* Flow and sediment */
If CharacName = 'Total Suspended Solids (TSS)' and Medium = 'Water'  then pcode = '80154' ;
If CharacName = 'Solids, Total Suspended (TSS)' and Medium = 'Water'  then pcode = '80154' ; 
If CharacName = 'Flow' then pcode = '00061' ;


/* Convert remark code from modernized to NWIS style -  values below detection limit are reported as comment '*Non-detect' 
  (or occasionally, '*Present <QL') in the ResultText field.   */

If ResultText = '*Non-detect' then RemarkCode = 'U' ; 

 If ResultText = '*Present <QL' and resultnumber = <0.0 then RemarkCode = '<' ; 
 If ResultText = '*Present <QL' and resultnumber = '.' then RemarkCode = '<' ; 
 /* Found this for some results (ammonia, most commonly)where resultnumber reported as zero, and detection limit not populated. 
    Need to treat these as less thans and fill in detection limit somehow.  But in one case found with
    resultnumber populated as an actual positive number - treat these as 'E' instead */
If ResultText = '*Present <QL' and resultnumber > 0.0 then RemarkCode = 'E' ;  


  If ResultText = '*Present<QL' and resultnumber <= 0.0 then RemarkCode = '<' ; /*Also found it without the space before < */
  If ResultText = '*Present<QL' and resultnumber = '.' then RemarkCode = '<' ; 
  If ResultText = '*Present<QL' and resultnumber > 0.0 then RemarkCode = 'E' ;  

/* Noticed some instances of nondetect being indicated with a negative number
   in the ResultText field.  These will not be captured as 'U' with the above text.
   It appears that resultnumber is also recorded as negative number in these cases,
   so can recognize it as nondetect that way.  (Easier than looking for a negative
   number in the ResultText field, which is a text field) */
  
  If resultnumber < 0.0 then RemarkCode = 'U'; /* This may overwrite '<', but not serious since they have the same meaning */

/* Interpret detection limit from the DetText field */

DetectionLimit = scan(DetText, 1, ' ') ; 
DetLimNum = 1.0 * DetectionLimit ;
 /* The field DetText is a text field, and in most cases (based on test retreivals) 
 where this field is populated - it is composed of the numeric value
 of detection limit, then several spaces, then the units (e.g. mg/L). Don't bother 
 about scanning the units part of this text string - assume 
 will be in same units as result. 
Occasionally, though, it's populated only with the units information, without a number, and
 in these cases, the scanning of DetText will produce a series of letters rather than
 a number.  These cases will in turn
 cause errors in SAS Log when DetectionLimit is forced to numeric.  These
 errors are harmless.  In some of these cases (i.e. where no numeric
 value available in DetText) can salvage detection-limit-like
 information from LowerQuantLimit or other sources - this is done in Part II */

/* For Flow parameter, convert mgd or m3/sec or gal/min to cfs */
If Units = 'mgd' or Units = 'MGD' then resultnumber = resultnumber * 1.547 ;
If Units = 'cms' or Units = 'CMS' or Units = 'm3/sec' then resultnumber = resultnumber * 35.32 ;
If Units = 'gal/min' then resultnumber = resultnumber * .002228 ;

/* Delete result when units given as tons/day (found this value for some stations in the NFL group) and g/sec
    (found in station in Va block)-
      not easily convertible back to concentration so just drop.  Presumably the concentration
      result is also given for the sample so can reconstruct load */
If Units = 'tons/day' then delete ;
If Units = 'g/sec' then delete ;

/* For nutrient parm codes, convert result (and detection limit/quantification limits) to mg/L 
     if reported in ug/L or umol (note: mg, found in Miss_AL block, is assumed to mean mg/l).
     Some characteristic names are not
     explicit about whether values reported as compound or elemental (N or P) mass - assume as
     elemental mass if not specified otherwise */
If Units = 'ug/l' or Units = 'UG/L' or Units = 'ug/L' or Units = 'ppb' or Units = 'mg/m3' then do ;
resultnumber = resultnumber * 0.001;
DetLimNum = DetLimNum * 0.001;
LowerQuantLimit = LowerQuantLimit * 0.001 ;
UpperQuantLimit = UpperQuantLimit * 0.001 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen ion (N)' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, ammonia (NH3) + ammonium (NH4)' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, ammonia as N' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, ammonia (NH3) as NH3' then do ;
resultnumber = resultnumber * 0.0170 ;
DetLimNum = DetLimNum * 0.0170;
LowerQuantLimit = LowerQuantLimit * 0.0170 ;
UpperQuantLimit = UpperQuantLimit * 0.0170 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, Nitrite (NO2) + Nitrate (NO3) as N' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, Nitrate (NO3) as N' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, Nitrate (NO3) as NO3' then do ;
resultnumber = resultnumber * 0.0620 ;
DetLimNum = DetLimNum * 0.0620;
LowerQuantLimit = LowerQuantLimit * 0.0620 ;
UpperQuantLimit = UpperQuantLimit * 0.0620 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, Kjeldahl' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, ammonia (NH3)+ organic' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Nitrogen, organic' then do ;
resultnumber = resultnumber * 0.0140 ;
DetLimNum = DetLimNum * 0.0140;
LowerQuantLimit = LowerQuantLimit * 0.0140 ;
UpperQuantLimit = UpperQuantLimit * 0.0140 ;  end;
If Units = 'umol' and CharacName = 'Phosphorus as P' then do ;
resultnumber = resultnumber * 0.0308 ;
DetLimNum = DetLimNum * 0.0308;
LowerQuantLimit = LowerQuantLimit * 0.0308 ;
UpperQuantLimit = UpperQuantLimit * 0.0308 ;  end;
If Units = 'umol' and CharacName = 'Phosphorus' then do ;
resultnumber = resultnumber * 0.0308 ;
DetLimNum = DetLimNum * 0.0308;
LowerQuantLimit = LowerQuantLimit * 0.0308 ;
UpperQuantLimit = UpperQuantLimit * 0.0308 ;  end;
If Units = 'umol' and CharacName = 'Phosphorus, orthophosphate as P' then do ;
resultnumber = resultnumber * 0.0308 ;
DetLimNum = DetLimNum * 0.0308;
LowerQuantLimit = LowerQuantLimit * 0.0308 ;
UpperQuantLimit = UpperQuantLimit * 0.0308 ;  end;
If Units = 'umol' and CharacName = 'Phosphorus, orthophosphate as PO4' then do ;
resultnumber = resultnumber * 0.0949 ;
DetLimNum = DetLimNum * 0.0949;
LowerQuantLimit = LowerQuantLimit * 0.0308 ;
UpperQuantLimit = UpperQuantLimit * 0.0308 ;  end;

/* Found one case of the unit ml/l for a nutrient (ammonia) - assume
     this was a typo? (don't try to recode) */

 /* Handle case of 'greater thans'.  Fill in result number (if missing) with
       the upper quantification limit.  (This can be handled now rather than waiting
       until Part II -  whereas filling in resultnumber with detection limit has to wait
       until Part II because of the 'force to numeric' issue  */ 
 If ResultText = '*Present >QL' then RemarkCode = '>' ; 
 If RemarkCode = '>' and resultnumber = '.' then resultnumber = UpperQuantLimit ;


run ;
/*  This part inserted after October 4 change in modernized STORET:
Delete QA/QC samples from record  (since Oct. 4, 2005,  retrievals from
 modernized STORET include these samples, but these are identified as ActivityCat something
 other than 'Routine Sample' */

data work.temp2a ;
set work.temp2 ;
If ActivityCat = 'Routine Sample' ;
run;

data work.temp3 ;
set work.temp2a ;
/*  Drop unnecessary fields - and drop text fields with commas(org_name station_name CharacName)
         because the commas may interfere with the csv format */
   keep Org_Code Org_Name Station_No station_id sample_id StateName CountyName 
    lat lon huc8 StationTypeMod Depth DepthUnit Medium ActivityID date time pcode 
    resultnumber RemarkCode ResultText SampleFraction Units ValueType StatType DetLimNum LowerQuantLimit ;

	/* Drop records for which pcode not populated (reformatting code could handle it, but since I'm dropping Characteristic Name
	     there's no information identifying the parameter and thus no reason to save ) */
	 if pcode < 1 then delete ; 
  run ;

proc sort data = work.temp3 ;
  by sample_id ;

  run;
/* Output data to comma-delimited file  -  which will be reformatted to one line per sample in a separate program */
  /*  IMPORTANT:  make sure that the pathname in the OUTFILE command matches the pathname for the home library
       (set at the beginning of the file */ 
PROC EXPORT DATA= work.temp3 
            OUTFILE= "K:\regnawqa\MRB-2\Nut_TrendsSparrow\instreamdata\qwdata\STORET_modernized\Stationlists_Datafiles\WQData\Repull_fullparms_sept05\SFL\outcsvfile.csv" 
            DBMS=csv REPLACE;
RUN;

/*  Part II  
*/

/* Specify the names of the WQ input file  */
%let WQ_input_filenames = outcsvfile.csv ;


/* Specify the list of WQ parameters on the file that you wish to retain. If the input data files
   do not contain header information then the listing of the parameters is the order in which they
   are entered on the file. */
%let parm_list = P00600 P00610 P00615 P00620 P00625 P00630 P00631 P00665 ;
%let parm_list = P00600 
 P00608 P00610 P01845 P01846 /* ammonia codes */
 P00630 P00631 P00620 P00618 P01850 P01851 /* nitrate codes */
 P00625 P00623 P00624 P00635 /* kjeldahl codes */
 /* P00605 P00606 P00607 P06050 */   /* organic nitrogen codes  -   decided not to bother with these in
     a few test cases made after October 2005 (when I first realized I should have been retrieving these
     data from STORET)  because the data were relatively sparse,  and most observations did not have
     sample fraction or if the did, didn't also have 00610, so I didn't feel I could use these to much advantage.    But in a future wholesale
     re-retrieval of these data,  should keep the P00605 values, and where kjeldahl is absent,  add values to
     P00610, if present, to compute kjeldahl  */
 P00665 P00666 P00667 P06550 P70507 P00671 P71886 P00660 /* phosphorus and orthophosphorus codes */
   P00061 P80154 ; /* flow and suspended sediment */

/* Specify the default detection levels for each parameter in same order as above (and in the same units).
   Enter a . as a placeholder for unknown detection level. */
%let detect_list = 1 . . . . . . .01 ;
%let detect_list = . . . . . . . . . . . . . . . . . . . . . . . . .;

/* ------------------------------------------------------------- */

libname dir "&home" ;



/* ---------------------------------------------------------------------- */

/* Macro counts the number of objects separated by a blank in a list */

%macro count(list) ;

  %local i_list var ;
  %let i_list = 1 ;
  %let var = %scan(&list,&i_list,' ') ;
  %do %while (%length(&var) > 0) ;
    %let i_list = %eval(&i_list + 1) ;
    %let var = %scan(&list,&i_list,' ') ;
  %end ;
  %eval(&i_list - 1)

%mend count ;

/* ---------------------------------------------------------------------- */

%macro set_wq_data_all ;

  %if %upcase(%scan(&SAS_output_name,2,' ')) ^= APPEND %then %do ;
    proc datasets lib = dir nolist ;
      delete &SAS_output_name ;
    run ;
  %end ;
  %let SAS_output_name = %scan(&SAS_output_name,1,' ') ;

  %let rparm_list = ;
  %let dparm_val_list = ; 
  %let dparm_list = ;
  %let i_parm = 1 ;
  %do i_parm = 1 %to %count(&parm_list) ;
    %let parm = %scan(&parm_list,&i_parm,' ') ;
    %let rparm_list = &rparm_list R%substr(&parm,2) ;
    %let dparm_val_list = &dparm_val_list D%substr(&parm,2) %scan(&detect_list,&i_parm,' ') ;
    %let dparm_list = &dparm_list D%substr(&parm,2) ;
  %end ;

  %do i_file = 1 %to %count(&wq_input_filenames) ;

    filename indata "&home.\%scan(&wq_input_filenames,&i_file,' ')" ;

    data input_data (keep = station_id date time &parm_list &rparm_list &dparm_list agency /* samp_cd comp_stat_cd */) ;
 /* Note - decided not to keep samp_cd and comp_stat_cd in the reformatted file - these don't contain information
	              needed for running fluxmaster.   */
    attrib 
  /* First define length for character attributes that we need in order to reformat the data file for fluxmaster */
             agency inagency length = $8
             station_id instation_id length = $15 
             date informat = mmddyy10. format = mmddyy10. 
             indate informat = yymmdd10. format = mmddyy10. 
             intime length = $4
             time informat = time5. format = time5.
             parameter_cd length = $5
			 remark_cd length = $1

    /* Define length for any character attributes that we're not sure whether we need to bother with - */
             samp_cd length = $12 /* A unique sample code number - read from the modernized variable Activity ID */
			 StatType length = $5 /* Doesn't seem to be populated in modernized STORET */
			 ValueType length = $6 /* Supposedly indicates whether value is actual, estimated, calculated */

	/* Attributes that we won't bother with - read in as character with length = $1 */
             org_name state county huc StationTypeMod Medium SampleFraction ResultText
             lat lon  samp_depth Depthunit 
             Units 
             Concatstation_id Concatsample_id length = $1
    ; 
      format date mmddyy10. ;
      retain count 0 contains_data 0 station_id '' date . time . &parm_list . &rparm_list ' ' &dparm_val_list agency '' /* samp_cd ' ' comp_stat_cd ' ' */ ;
   /* Note: changed response for samp_cd in above command to from . to ' ' to correspond with switch to character */


      infile indata dsd delimiter = ',' missover lrecl = 1000 end = the_end ;

   /* Although it would be nice to skip reading in the first record of the data set (which contains the
	   attribute names), I can't figure out how to make the following code work because the initial value for count is apparently not 
	   zero anymore, and attempts to set it back to zero don't work.
	   Can't figure out how to fix the problem, but it results in reading error (a single
	   occurrence of the message in the log file :
	   "NOTE: Invalid argument to function INPUT at line 56 column 183."
       so leave as is. */
        if count <= 0 then do ;
        input @1 first_char $1 ;
        count + 1 ;
      end ;
      else do ;
/* This next section handles 'bad character' at beginning of line - not really an issue for modernized STORET, but
	      leave it in anyway */
      input @1 first_char $1 @ ;
        if first_char = byte(12) then start = 2 ;
        else start = 1 ;

        inputdat: input @start inagency org_name instation_id  state county huc 
           StationTypeMod Medium SampleFraction ValueType StatType ResultText lat lon 
           samp_cd samp_depth Depthunit
           result_va Units LowerQuantLimit indate intime Concatstation_id Concatsample_id
           parameter_cd remark_cd DetectionLimit  ;
/* Comments on equivalency between modernized and legacy (and NWIS):  
	Activity ID (modernized) is somewhat equivalent to Sample Code (legacy), although Activity ID
is alphanumeric rather than numeric.
	S/G/O Indicator (modernized - I renamed variable to StationTypeMod) should always be S (since we requested only
		   surface-water stations) and therefore can be considered equivalent to  SW_ind (legacy)
     Medium and SampleFraction and Units have no equivalent in legacy (this info part of pcode) 
     Statistic Type  (modernized) - I assume this is equivalent to "Composite Statistic Code" (legacy)
		 Glossary definition of Statistic Type: A statistic or calculation type which describes the reported result 
		  (e.g., average, mode, median, MPN - and note, could also be 'Actual'). 
	 ResultText (modernized) has no equivalent in legacy (it contains either the value of *Non-detect)
	    the non-detect info has been coded already as remark code U  
	 Lower Quantification Limit (modernized) has no equivalent in legacy)
*/
/* Comments on some variables calculated from modernized variables (in preprocessing)
	 Concatstation_id (concatenated agency and station id), was calculated after processing for 
		  legacy data, whereas with modernized it had to be calculated prior to reformatting 
		  in order to arrange the records in proper order (grouped by sample)
	 Concatsample_id (concatenation of agency, station, date, time),
		  was calculated after processing for legacy data, where with modernized it had to be
		  calculated prior to reformatting in order to arrange the records in proper order (grouped by sample)
     DetectionLimit (populated as numeric during preprocessing whenever Detection Limit (alphanumeric)
		   contained a value)
		   but note that not all Non-detect results have a value stored for Detection Limit) 
		   No equivalent to legacy because in legacy (and NWIS) the detection limit for an observatin below
		   detection is stored as "Result Value" and is not stored separately as a value, whereas in 
		   modernized STORET, the detection limit for an observation below detection is occasionally stored
		   as Detection Limit (alphanumeric - a text string with number and unit), and the field
		   "Result as Number" is left blank.
		   Will need to alter reformatting code to handle this difference for modernized:
		   when Result Value as number is missing and remark code is U and DetectionLimit is not missing,
		   then set Result Value to Detection_Limit. 
		   when Result Value as number is missing and remark code is U and DetectionLimit is missing, then
		     figure out something else to do, perhaps as part of Fluxmaster to synthesize a detection limit
		     (and thus fill in a value for Result Value)  
			  */
        if contains_data and the_end then do ;
          output input_data ;
          stop ;
        end ;
/* Next line handles case of blank lines in input file (not a problem in modernized, but keep anyway) */
        if inagency = '' then goto inputdat ;

        if contains_data = 1 and (indate ^= date or station_id ^= instation_id) then do ;
          output input_data ;
          contains_data = 0 ;
          array parm_list {*} &parm_list ;
          array rparm_list {*} &rparm_list ;
          array dparm_list {*} &dparm_list ;
          do i = 1 to dim(parm_list) ;
            parm_list{i} = . ;
            rparm_list{i} = '' ;
            dparm_list{i} = scan("&detect_list",i,' ') ;
          end ; 
        end ;

        /* Insert any qualifiers for keeping records here: */
/*
        if medium_cd = '9' or 
           medium_cd = '' or 
           hyd_event_cd = '9' or
           hyd_event_cd = '' or 
           samp_type_cd = '9' or 
           samp_type_cd = '' or
           (remark_cd ^= 'M' and remark_cd ^= 'N' and remark_cd ^= 'V' and remark_cd ^= 'U') then do ;
*/
          /* Parse out the WQ data into the proper parameter variable */
          %do i_parm = 1 %to %count(&parm_list) ;
            %let parm = %scan(&parm_list,&i_parm,' ') ;
            %let rparm = %scan(&rparm_list,&i_parm,' ') ;
            %let dparm = %scan(&dparm_list,&i_parm,' ') ;
            if parameter_cd = substr("&parm",2) then do ;
              contains_data = 1 ;
              &parm = result_va ;
              &rparm = remark_cd ;

/* For nondetect observations, use LowerQuantLimit (if available) to populate Detection Limit (if it's missing)  */
 if remark_cd = 'U' and DetectionLimit = '.' and LowerQuantLimit > 0.0 then DetectionLimit = LowerQuantLimit ;  
 if remark_cd = '<' and DetectionLimit = '.' and LowerQuantLimit > 0.0 then DetectionLimit = LowerQuantLimit ;

 /* If result_va contains a negative number (presumably it's the inverse of the detection limit), use it to
  populate the detection limit.  These cases were coded as 'U'in Part I */
 if result_va < 0.0 and DetectionLimit = '.' then DetectionLimit = -1.0 * result_va ;

 /* This statement applies to the nondetect (or present below quantification) results in the preprocessed modernized STORET data set - 
   nondetect coded with remark_cd = U or <, result_va field contains no value, a dummy zero value (rare), or a negative
   value which may indicate the inverse of the detection limit (rare).   */
              if remark_cd = 'U' and DetectionLimit > 0.0 then %scan(&parm_list,&i_parm,' ') = DetectionLimit ; /* This will also overwrite the zero and negative cases with best estimate of detection limit*/
              if remark_cd = 'U' and DetectionLimit > 0.0 then %scan(&dparm_list,&i_parm,' ') = DetectionLimit ;
              if remark_cd = '<' and DetectionLimit > 0.0 then %scan(&parm_list,&i_parm,' ') = DetectionLimit ;  /* This will also overwrite the zero and negative cases with the best estimate of detection limit*/
              if remark_cd = '<' and DetectionLimit > 0.0 then %scan(&dparm_list,&i_parm,' ') = DetectionLimit ;

			  end;
          %end ;
*        end ;
      end ;
      date = indate ;
      if length(trim(left(intime))) = 4 then time = input(substr(intime,1,2) || ':' || substr(intime,3),time5.) ;
      else time = input(substr(intime,1,1) || ':' || substr(intime,2),time5.) ;
      station_id = instation_id ;
      agency = inagency ;
    run ;

    proc append base = dir.&SAS_output_name data = input_data ;
    run ;

  %end ;

%mend ;

%set_wq_data_all ;