/** Make Models.SAS
**    Clint Moore, USGS
**    25 August 2011
**
**  Reads data from elicitation spreadsheets and prepares it for processing.
**    Saturated models are fit in GLM (native prairie responses) and CATMOD
**    (dominance category responses).  Parameter estimates are exported to
**    IML for simulated draws of NP and defoliation control variables within
**    respective categories.  Simulation output are summarized into transition
**    probabilities, by multiplying marginal NP and dominance category
**    probabilities.  Transition probability matrix is written to a spreadsheet,
**    and written to a formatted text file for inclusion in ASDP script.
**
**  Although this program has been used by the U.S. Geological Survey (USGS),
**    no warranty, expressed or implied, is made by the USGS or the
**    U.S. Government as to the accuracy and functioning of the program and
**    related program material nor shall the fact of distribution constitute
**    any such warranty, and no responsibility is assumed by the USGS in
**    connection therewith.
**/

** Root path -- other path statements below branch off this one **;
%let rootpath = c:\projects\rcrp\np\competing models\2011 effort\tall_23Aug;
%*let rootpath = c:\projects\rcrp\np\competing models\2011 effort\mixed_final;

** Select a model type **;
** 3: All treatments and defoliation treated linearly (e.g., models 3-6);
** 2: All treatments, 1 defoliation class -- "Past does not matter" (e.g., model 2);
** 1: Rest vs Active, 2 defoliation classes -- "Regular defoliation cycle" (e.g., model 1);
%let mtype = 1;

** Select a grass type **;
** 1: Mixed, 2: Tall **;
%let gtype = 2;

/**** Elicitation spreadsheet parameters ***/
    ** Subpath for location of elicitation spreadsheet **;
    %let elicitpath = elicitation 2011\;

    ** File name of spreadsheet **;
    %let elicitfile = competing models - tall.xls;

    ** Worksheet to be read **;
    %let elicitsheet = tall_m1;

    ** Amount (divisible into 100) by which to divide each NP and dominance response **;
    %let factor = 5;
    %let basis = %sysevalf(100/&factor);

    ** Regression points for NP data **;
    %let lowNP = 0.30;
    %let highNP = 0.70;
    %let xindex = %sysevalf(&lowNP-0.5), %sysevalf(&highNP-0.5);

    ** Regression points for defoliation data **;
    %let defindex = -1, 1;

/**** Parameters for estimation model execution ***/
    ** Sample sizes for native prairie and dominance portions of elicitation **;
    %let np_n = 20;
    %let inv_n = 20;

    ** Number of simulations for construction of transition probabilities **;
    %let nsims = 100000;

    ** Random number seed **;
    %let rseed = 54321;

/**** Parameters for exporting transition probabilities ***/
    ** Subpath for location of model spreadsheet **;
    %let modelpath =;

    ** File name of model spreadsheet **;
    %let modelfile = trans_probs.xls;

    ** Worksheet to write to **;
    %*let modelsheet = &elicitsheet._%sysfunc(int(%sysfunc(time())));
    %let modelsheet = &elicitsheet;

/**** Parameters for writing ASDP file ***/
    ** Subpath for location of output file **;
    %let ASDPpath = make ASDP files\;

    ** File name string for model **;
    %let ASDPfile = &elicitsheet;

%let trtm1 = "Graze";  %let trtm2 = "Burn";  %let trtm3 = "B/G";
%let trtt1 = "GrazeW"; %let trtt2 = "BurnW"; %let trtt3 = "Defol";

%macro readelicit;
  ** This macro reads the elicitation spreadsheet for a single model and creates data
  **  for input into the GLM and CATMOD routines **;

  ** Depending on model type, select appropriate spreadsheet columns to read **;
  %if &mtype=3 %then %let dostring = 1 to 32;
  %if &mtype=2 %then %let dostring = 1 to 16;
  %if &mtype=1 %then
    %do;
      %*if &gtype=1 %then %let dostring = 1,2,5,6,9,10,13,14,17,18,21,22,25,26,29,30;
      %*if &gtype=2 %then %let dostring = 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31;
      %let dostring = 1,2,5,6,9,10,13,14,17,18,21,22,25,26,29,30;
    %end;


  ** Read in values from indicated elicitation worksheet and range (Excel 2003 format) **;
  proc import
      datafile="&rootpath\&elicitpath.&elicitfile"
      dbms="EXCEL" out=a2 replace;
    range = "&elicitsheet$e7:bo19"n;
    getnames = NO;
    run;

  proc import
      datafile="&rootpath\&elicitpath.&elicitfile"
      dbms="EXCEL" out=a1 replace;
    range = "&elicitsheet$e24:bo36"n;
    getnames = NO;
    run;

  proc import
      datafile="&rootpath\&elicitpath.&elicitfile"
      dbms="EXCEL" out=b replace;
    range = "&elicitsheet$e47:bo51"n;
    getnames = NO;
    run;

  ** Each range contained a row of zeros to force reading columns correctly -- erase these first rows **;
  data a1;  set a1;  if _n_=1 then delete;  run;
  data a2;  set a2;  if _n_=1 then delete;  run;
  data b;  set b;  if _n_=1 then delete;  run;

  ** This step reads in the elicitation data for the lower NP section **;
  data a1 (keep=nplevel dlevel vlevel tlevel p);
    set a1;
    array col[63] f1-f63;   ** Array elements correspond to columns of worksheet **;
    array tx[32] _temporary_ (32*1);
    ** Class assignments for defoliation levels, invader levels, and treatment levels **;
    array classes[3,32] _temporary_ (1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
                                     2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2,
                                     1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4
                                     1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4,
                                     1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4
                                     1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4);
    ** p decreases in steps of 0.05 with each data row **;
    p = (&lowNP+0.35) - (_n_-1)/20 + 0.025;
    do i=&dostring;
      ** These steps count up the number of times the NP response falls into each bin **;
      j = (i-1)*2+1;
      t0 = tx[i];
      ** Found a non-zero data element **;
      if col[j]>0 then
        do t=t0 to t0+(col[j]/&factor)-1;
          nplevel = 1;
          dlevel = classes[1,i];
          vlevel = classes[2,i];
          tlevel = classes[3,i];
          tx[i] = t+1;
          output;
        end;
    end;
    run;

  ** This step reads in the elicitation data for the upper NP section **;
  data a2 (keep=nplevel dlevel vlevel tlevel p);
    set a2;
    array col[63] f1-f63;   ** Array elements correspond to columns of worksheet **;
    array tx[32] _temporary_ (32*1);
    ** Class assignments for defoliation levels, invader levels, and treatment levels **;
    array classes[3,32] _temporary_ (1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
                                     2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2,
                                     1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4
                                     1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4,
                                     1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4
                                     1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4);
    ** p decreases in steps of 0.05 with each data row **;
    p = (&highNP+0.15) - (_n_-1)/20 + 0.025;
    do i=&dostring;
      ** These steps count up the number of times the NP response falls into each bin **;
      j = (i-1)*2+1;
      t0 = tx[i];
      ** Found a non-zero data element **;
      if col[j]>0 then
        do t=t0 to t0+(col[j]/&factor)-1;
          nplevel = 2;
          dlevel = classes[1,i];
          vlevel = classes[2,i];
          tlevel = classes[3,i];
          tx[i] = t+1;
          output;
        end;
    end;
    run;

  ** This step merges the 2 NP files together and calculates the empirical logits for GLM **;
  data p;
    set a1 a2;
    array z[2] _temporary_ (&xindex);
    array def[2] _temporary_ (&defindex);
    x = z[nplevel];
    d = def[dlevel];
    l = log(p/(1-p));
    run;

  ** Process the invader elicitation input **;
  data q (keep=dlevel d vlevel tlevel q count);
    set b;
    array col[63] f1-f63;   ** Array elements correspond to columns of worksheet **;
    array def[2] _temporary_ (&defindex);
    array qlabel[4] $ _temporary_ ('1-SB' '2-Co' '3-KB' '4-RM');
    ** Class assignments for defoliation levels, invader levels, and treatment levels **;
    array classes[3,32] _temporary_ (1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
                                     2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2,
                                     1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4
                                     1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4,
                                     1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4
                                     1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4);
    q = qlabel[_n_];
    do i=&dostring;
      j = (i-1)*2+1;
      count = max(col[j],0) / &factor;
      dlevel = classes[1,i];
      vlevel = classes[2,i];
      tlevel = classes[3,i];
      d = def[dlevel];
      output;
    end;
    run;

%mend;

%readelicit


** The following data steps re-create individual defoliation index values and
**   compute their frequency of assignment into defoliation classes (for use
**   in drawing simulated values later on)  **;
data def;
  array new[7] _temporary_;
  ** Make a temporary array of coefficients for logistic defoliation index **;
  array c[8] _temporary_ (0.952574127, 0.880797078, 0.731058579, 0.5,
                          0.268941421, 0.119202922, 0.047425873, 0.01798621);
  ** Center and scaling for these coefficients **;
  array sc[2] _temporary_ (3.36, 2.36);
  ** There are 128 = 2^7 unique defoliation histories -- calculate each one **;
  do case = 1 to 128;
    remainder = case;
    do i=1 to 7;
      ** Divide by the current power of 2, then take the integer part **;
      new[i] = int(remainder/2**(7-i));
      ** Calculate the remainder for this division operation **;
      remainder = remainder - new[i]*(2**(7-i));
    end;
    ** Calculate years since (ys) and number of defoliations (nd) for each history **;
    ys = 0;
    foundit = 0;
    do until(foundit>0 | ys=7);
      ys+1;
      if new[ys]=1 then foundit = 1;
    end;
    if foundit=0 then ys = 8;
    nd = new[1]+new[2]+new[3]+new[4]+new[5]+new[6]+new[7];
    ** Compute defoliation index given ys and nd **;
    dx = c[ys]*nd;
    ** Assign each index to a class **;
    if dx<2 then class=1;
    else if dx>4 then class=3;
    else class=2;
    ** Center and scale each index **;
    dx = (dx - sc[1])/sc[2];
    output;
  end;
  run;

** The next steps compute the frequencies of histories in each class **;
proc summary data=def n;
  class class dx;
  var nd;
  output out=def n=count;
  run;

data def2(keep=_FREQ_);
  set def(where=(_TYPE_=2));
  run;

data def(drop=_TYPE_ _FREQ_);
  set def(where=(_TYPE_=3) drop=_FREQ_);
  retain groupcount;
  do i=1 to 3;
    set def2 point=i;
    if class=i then groupcount=_FREQ_;
  end;
  ** Sampling probability for the defoliation class **;
  prob = count/groupcount;
  run;


%macro np_model;
  ** This macro fits the NP data to a multi-way (saturated) linear model,
  **  draws samples from the NP and defoliation classes, computes the predicted
  **  NP outcome, and computes transition frequencies **;

  %if &mtype=3 %then %do;
    ** For this model class, defoliation is treated as a continuous predictor **;
    %let dclass =;
    %let dfactor = d|;
    %let msedenom = n*maxt*maxv*4 - maxt*maxv*4;
  %end;
  %if &mtype=2 %then %do;
    ** For this model class, defoliation is not used in the model **;
    %let dclass =;
    %let dfactor =;
    %let msedenom = n*maxt*maxv*2 - maxt*maxv*2;
  %end;
  %if &mtype=1 %then %do;
    ** For this model class, defoliation is used as a class variable **;
    %let dclass = d;
    %let dfactor = d|;
    %let msedenom = n*2*maxv*4 - 2*maxv*4;
  %end;

  **  GLM for NP model **;
  ods select none;
  proc glm data=p;
    class &dclass vlevel tlevel;
    model l = x | &dfactor vlevel | tlevel / predicted solution inverse;
    ods output ParameterEstimates=c InvXPX=d;
    run;
  ods select all;

  ** Simulation for NP model  **;
  proc iml;
    ** Read in parameter vector, SSE information, and COVB matrix **;
    use d;
    read all var{l} into parms where(Parameter^="l");
    read all var{l} into sse where(Parameter="l");
    read all into inv where(Parameter^="l");
    ** Read in defoliation class sampling probabilities **;
    use def;
    read all var{class dx prob} into dx;

    inv = inv[,1:nrow(inv)];
    nsims = &nsims;
    n = &np_n;
    ** Number of treatment and defoliation classes depend on model type **;
    maxt = 4*(&mtype>1) + 2*(&mtype=1);
    maxv = 4;
    maxd = 3*(&mtype>1) + 2*(&mtype=1);
    maxnp = 4;
    ** Centered NP categories **;
    npcats = {0.00, 0.30, 0.45, 0.60, 1.00};
    npbins = npcats[1:4] || npcats[2:5];
    npcenter = 0.5;
    npcats = npcats-npcenter;
    mse = sse / (&msedenom);

    ran1 = ranuni(j(nsims,maxt*maxv*maxd*(1+maxnp),&rseed));
    ran2 = rannor(j(nsims,maxt*maxv*maxd*maxnp,&rseed));
    results = j(maxt*maxv*maxd*maxnp,8,0);
    lookup1 = 0;
    lookup2 = 0;

    ** Loop over treatments, then invader classes, then defoliation classes, then NP classes **;
    do t = 1 to maxt;
      ** Sets up the treatment dummy vector **;
      dt = j(1,maxt,0);
      dt[t] = 1;
      do v0 = 1 to maxv;
        ** Sets up the invader class dummy vector **;
        dv = j(1,maxv,0);
        dv[v0] = 1;
        dvdt = dv @ dt;   ** Invader * Treatment interaction **;
        do di = 1 to maxd;
          lookup1 = lookup1+1;
          lookupd = lookup1;
          ** Which defoliation class to sample from? **;
          dcases = dx[loc(dx[,1]=di),{2 3}];
          dcases = dcases || cusum(dcases[,2]);
          ** Sets up a defoliation class variable (and interactions) for model type 1 **;
          dd = j(1,maxd,0);
          dd[di] = 1;
          dddt = dd @ dt;
          dddv = dd @ dv;
          dddvdt = dddv @ dt;
          do np = 1 to maxnp;
            lookup1 = lookup1+1;
            lookup2 = lookup2+1;
            ** For model type 2, only go through the following steps once **;
            if &mtype^=2 | (&mtype=2 & di=1) then
              do;
                np1 = j(1,maxnp,0);
                ** Start the sampling loop **;
                do s = 1 to nsims;
                  ** For model type 3, draw a random defoliator predictor
                  **  from the appropriate class and set up interactions **;
                  if np=1 & &mtype=3 then
                    do;
                      d = dcases[(loc(ran1[s,lookupd]<dcases[,3])[<:>]),1];
                      ddv = d*dv;
                      ddt = d*dt;
                      ddvdt = d*dvdt;
                    end;
                  ** Draw a random NP value from the appropriate class **;
                  x = npcats[np] + ran1[s,lookup1]*(npcats[np+1] - npcats[np]);
                  ** Assemble the predictor vector, based on model type **;
                  if &mtype=2 then
                      xvec = 1 || x || dv || x*dv || dt || x*dt || dvdt || x*dvdt;
                  else if &mtype=1 then
                      xvec = 1 || x || dd || x*dd || dv || x*dv || dddv || x*dddv || dt ||
                             x*dt || dddt || x*dddt || dvdt || x*dvdt || dddvdt || x*dddvdt;
                  else
                      xvec = 1 || x || d || x*d || dv || x*dv || ddv || x*ddv || dt ||
                             x*dt || ddt || x*ddt || dvdt || x*dvdt || ddvdt || x*ddvdt;
                  ** Prediction of logit NP and mean probability **;
                  y = xvec * parms;
                  pmean = 1 / (1 + exp(-y) );
                  ** SE of the prediction **;
                  lstdi = sqrt((1 + xvec * inv * xvec`)*mse);
                  ** Draw a random predicted value **;
                  draw = y + ran2[s,lookup2]*lstdi;
                  ** Convert to probability **;
                  pdraw = 1 / (1 + exp(-draw) );
                  ** Determine which NP probability class this prediction belongs **;
                  npbin = loc( (pdraw-npbins[,1])#(pdraw-npbins[,2]) < 0);
                  ** Add to the mean accumulator  **;
                  np1[npbin] = np1[npbin]+1/nsims;
                end;
              end;
            ** Put this simulation average in results matrix **;
            results[lookup2,] = (t || v0 || di || np || np1);
          end;
          ** For model type 2, copy the values from the first level into the remaining ones **;
          if &mtype=2 & di>1 then
            do;
              rowstart0 = lookup2-(maxnp-1)-maxnp*(di-1);
              rowend0 = lookup2-maxnp*(di-1);
              rowstart = lookup2-(maxnp-1);
              results[rowstart:lookup2,5:8] = results[rowstart0:rowend0,5:8];
            end;
        end;
      end;
    end;

    names = {'trt','invader','D class','NP class','NP1 class'};
    create np var {trt inv0 defidx np0 np1_1 np1_2 np1_3 np1_4};
    append from results;

    quit;

  ** For model type 1, this step is needed to copy results into other levels of
  **  the treatment and defoliation classes **;
  data np (drop=trttest deftest);
    set np;
    retain trt inv0 defidx np0 np1_1-np1_4;
    ** (Old version) For tallgrass, "defoliation" constitutes treatments 3 (BurnW) and 4 (Defoliate) **;
    ** if &mtype=1 & &gtype=2 & trt=2 then trt = 3;
    output;
    if &mtype=1 then
      do;
        trttest = trt;
        deftest = defidx;
        if trttest=2 then
          do;
            trt = 3;
            output;
            trt = 4;
            output;
            trt = 2;
          end;
        if deftest=2 then
          do;
            defidx = 3;
            output;
            defidx = 2;
          end;
        if trttest=2 & deftest=2 then
          do;
            trt = 3;
            defidx = 3;
            output;
            trt = 4;
            output;
            trt = 2;
            defidx = 2;
          end;
      end;
    /******** Old version for GrazeW=Rest
    if &mtype=1 then
      do;
        trttest = trt;
        deftest = defidx;
        if &gtype=1 & trttest=2 then
          do;
            trt = 3;
            output;
            trt = 4;
            output;
            trt = trttest;
          end;
        if &gtype=2 & (trttest=1 | trttest=3) then
          do;
            trt = trttest+1;
            output;
            trt = trttest;
          end;
        if deftest=2 then
          do;
            defidx = 3;
            output;
            defidx = 2;
          end;
        if &gtype=1 & trttest=2 & deftest=2 then
          do;
            trt = 3;
            defidx = 3;
            output;
            trt = 4;
            output;
            trt = trttest;
            defidx = 2;
          end;
        if &gtype=2 & (trttest=1 | trttest=3) & deftest=2 then
          do;
            trt = trttest+1;
            defidx = 3;
            output;
            trt = trttest;
            defidx = 2;
          end;
      end;
      ********/
    run;

  proc sort data=np;  by trt inv0 defidx;  run;

%mend;
%np_model

%macro inv_model;
  ** This macro fits the dominance data to a multi-way (saturated) logit model,
  **  draws samples from the defoliation classes, computes the predicted
  **  dominance outcome, and computes transition frequencies **;

  %if &mtype=3 %then %do;
    ** For this model class, defoliation is treated as a continuous predictor **;
    %let ddirect = d;
    %let dfactor = d|;
  %end;
  %if &mtype=2 %then %do;
    ** For this model class, defoliation is not used in the model **;
    %let ddirect =;
    %let dfactor =;
  %end;
  %if &mtype=1 %then %do;
    ** For this model class, defoliation is used as a class variable **;
    %let ddirect =;
    %let dfactor = d|;
  %end;

  ** Logit model for dominance response **;
  proc catmod data=q noprint;
    weight count;
    direct &ddirect;
    response / out=b outest=d;
    model q = &dfactor vlevel | tlevel / pred=prob profile ml;
    run;

  ** Simulation for dominance model  **;
  proc iml;
    ** Read in parameter vector and COVB matrix **;
    use d;
    read all into parms where(_TYPE_="PARMS");
    read all into covb where(_TYPE_^="PARMS");
    ** Read in defoliation class sampling probabilities **;
    use def;
    read all var{class dx prob} into dx;

    n = &inv_n;
    nsims = &nsims;
    ** Number of treatment and defoliation classes depend on model type **;
    maxt = 4*(&mtype>1) + 2*(&mtype=1);
    maxv = 4;
    maxd = 3*(&mtype>1) + 2*(&mtype=1);

    ran1 = ranuni(j(nsims,maxt*maxv*maxd,&rseed));
    results = j(maxt*maxv*maxd,7,0);
    lookup1 = 0;

    ** Loop over treatments, then invader classes, then defoliation classes **;
    do t = 1 to maxt;
      ** Sets up the treatment dummy vector (note coding for last level) **;
      dt = j(1,maxt-1,0);
      if t<maxt then dt[t] = 1;
      else dt = j(1,maxt-1,-1);
      do v0 = 1 to maxv;
        ** Sets up the invader class dummy vector **;
        dv = j(1,maxv-1,0);
        if v0<maxv then dv[v0] = 1;
        else dv = j(1,maxv-1,-1);
        dvdt = dv @ dt;      ** Invader * Treatment interaction **;
        do di = 1 to maxd;
          lookup1 = lookup1+1;
          ** Which defoliation class to sample from? **;
          dcases = dx[loc(dx[,1]=di),{2 3}];
          dcases = dcases || cusum(dcases[,2]);
          ** Sets up a defoliation class variable (and interactions) for model type 1 **;
          dd = j(1,maxd-1,0);
          if di<maxd then dd[di] = 1;
          else dd = j(1,maxd-1,-1);
          dddt = dd @ dt;
          dddv = dd @ dv;
          dddvdt = dddv @ dt;
          ** For model type 2, only go through the following steps once **;
          if &mtype^=2 | (&mtype=2 & di=1) then
            do;
              v1 = j(1,maxv,0);
              ** Start the sampling loop **;
              do s = 1 to nsims;
                ** For model type 3, draw a random defoliator predictor
                **  from the appropriate class and set up interactions **;
                if &mtype=3 then
                  do;
                    d = dcases[(loc(ran1[s,lookup1]<dcases[,3])[<:>]),1];
                    ddv = d*dv;
                    ddt = d*dt;
                    ddvdt = d*dvdt;
                  end;
                pmean = j(1,maxv,0);
                pdraw = j(1,maxv,0);
                xvec = j(maxv-1,ncol(parms),0);
                ** Loop over the response variables **;
                do j=1 to maxv-1;
                  ** Sets up dummy variable coding for the response variable **;
                  dr = j(1,maxv-1,0);
                  dr[j] = 1;
                  ** Assemble the predictor vector, based on model type **;
                  if &mtype=2 then xvec[j,] = dr || dv@dr || dt@dr || dvdt@dr;
                  else if &mtype=1 then
                    xvec[j,] = dr || dd@dr || dv@dr || dddv@dr || dt@dr || dddt@dr || dvdt@dr || dddvdt@dr;
                  else
                    xvec[j,] = dr || d*dr || dv@dr || ddv@dr || dt@dr || ddt@dr || dvdt@dr || ddvdt@dr;
                end;
                ** Prediction of logit responses and mean probability vector **;
                y = xvec * parms`;
                expy = exp(y`);
                pmean[1:maxv-1] = expy / (1+expy[,+]);
                pmean[maxv] = 1 / (1+expy[,+]);
                ** An approximate method for drawing random samples from mean probabilities **;
                do j=1 to maxv;
                  ** Beta random variables (indexed by sample size) **;
                  y1 = rangam(&rseed,pmean[j]*(&inv_n-1));
                  y2 = rangam(&rseed,(1-pmean[j])*(&inv_n-1));
                  pdraw[j] = y1 / (y1+y2);
                end;
                ** Normalize to 1.0 sum **;
                pdraw = pdraw / pdraw[,+];
                ** Add to the mean accumulator  **;
                v1 = v1 + pdraw/nsims;
              end;
              ** Put this simulation average in results matrix **;
              results[lookup1,] = (t || v0 || di || v1);
            end;
          ** For model type 2, copy the values from the first level into the remaining ones **;
          if &mtype=2 & di>1 then
            do;
              rowstart0 = lookup1-(di-1);
              results[lookup1,] = (t || v0 || di || results[rowstart0,4:7]);
            end;
        end;
      end;
    end;

    names = {'trt','invader','D class','NP1 class'};

    create inv var {trt inv0 defidx inv1_1 inv1_2 inv1_3 inv1_4};
    append from results;

    quit;

  ** For model type 1, this step is needed to copy results into other levels of
  **  the treatment and defoliation classes **;
  data inv (drop=trttest deftest);
    set inv;
    retain trt inv0 defidx inv1_1-inv1_4;
    ** (Old version) For tallgrass, "defoliation" constitutes treatments 3 (BurnW) and 4 (Defoliate) **;
    ** if &mtype=1 & &gtype=2 & trt=2 then trt = 3;
    output;
    if &mtype=1 then
      do;
        trttest = trt;
        deftest = defidx;
        if trttest=2 then
          do;
            trt = 3;
            output;
            trt = 4;
            output;
            trt = 2;
          end;
        if deftest=2 then
          do;
            defidx = 3;
            output;
            defidx = 2;
          end;
        if trttest=2 & deftest=2 then
          do;
            trt = 3;
            defidx = 3;
            output;
            trt = 4;
            output;
            trt = 2;
            defidx = 2;
          end;
      end;
    /******** Old version for GrazeW=Rest
    if &mtype=1 then
      do;
        trttest = trt;
        deftest = defidx;
        if &gtype=1 & trttest=2 then
          do;
            trt = 3;
            output;
            trt = 4;
            output;
            trt = trttest;
          end;
        if &gtype=2 & (trttest=1 | trttest=3) then
          do;
            trt = trttest+1;
            output;
            trt = trttest;
          end;
        if deftest=2 then
          do;
            defidx = 3;
            output;
            defidx = 2;
          end;
        if &gtype=1 & trttest=2 & deftest=2 then
          do;
            trt = 3;
            defidx = 3;
            output;
            trt = 4;
            output;
            trt = trttest;
            defidx = 2;
          end;
        if &gtype=2 & (trttest=1 | trttest=3) & deftest=2 then
          do;
            trt = trttest+1;
            defidx = 3;
            output;
            trt = trttest;
            defidx = 2;
          end;
      end;
      *********/
    run;

  proc sort data=inv;  by trt inv0 defidx;  run;

%mend;

%inv_model

%macro transprob;
  ** This macro merges the NP and INV files, computes the joint probability of
  **   NP class x Invader outcome, and writes results to spreadsheet **;

  ** Select appropriate treatment labels for grass type **;
  %if &gtype=1 %then %let fmtstring = 1="Rest" 2=&trtm1 3=&trtm2 4=&trtm3;
  %else %let fmtstring = 1="Rest" 2=&trtt1 3=&trtt2 4=&trtt3;

  proc format;
    value invfmt 1='SB' 2='Co' 3='KB' 4='RM';
    value npfmt 1='0-30' 2='30-45' 3='45-60' 4='60-100';
    value defolfmt 1='<2' 2='2-4' 3='>4';
    value trtfmt &fmtstring;
    run;

  ** Merge the 2 simulation files and compute transition probability **;
  data trprob;
    merge inv np;
    by trt inv0 defidx;
    length trtmnt $ 6 dominv $ 2 defolx $3 npclass $ 6;
    array np1[4] np1_1-np1_4;
    array inv1[4] inv1_1-inv1_4;
    array state1[16] np1sb np1co np1kb np1rm np2sb np2co np2kb np2rm
                     np3sb np3co np3kb np3rm np4sb np4co np4kb np4rm;
    sum = 0;
    ** NP states -- must reverse ordering (to read from high to low) **;
    do i=4 to 1 by -1;
      ** Invader states  **;
      do j=1 to 4;
        ** Joint probability of membership in NP state i and invader state j **;
        state1[(i-1)*4+j] = np1[i]*inv1[j];
        sum = sum+state1[(i-1)*4+j];
      end;
    end;
    trtmnt = put(trt,trtfmt.);
    dominv = put(inv0,invfmt.);
    npclass = put(np0,npfmt.);
    defolx = put(defidx,defolfmt.);
    run;

  proc sort data=trprob;
    by defidx trt descending np0 inv0;
    run;

  /**
  proc print data=trprob;
    var sum defidx trt np0 inv0 inv1_1-inv1_4 np1_1-np1_4
        np4sb np4co np4kb np4rm np3sb np3co np3kb np3rm
        np2sb np2co np2kb np2rm np1sb np1co np1kb np1rm;
    format trt trtfmt. np0 npfmt. inv0 invfmt. defidx defolfmt.;
    run;

  proc print data=trprob;
    var defidx trt np0 inv0
        np4sb np4co np4kb np4rm np3sb np3co np3kb np3rm
        np2sb np2co np2kb np2rm np1sb np1co np1kb np1rm;
    format trt trtfmt. np0 npfmt. inv0 invfmt. defidx defolfmt.
        np4sb np4co np4kb np4rm np3sb np3co np3kb np3rm
        np2sb np2co np2kb np2rm np1sb np1co np1kb np1rm 5.3;
    run;
  **/

  ** Prepare data for output to spreadsheet **;
  data trprobx;
    format defolx trtmnt npclass dominv
           np4sb np4co np4kb np4rm np3sb np3co np3kb np3rm
           np2sb np2co np2kb np2rm np1sb np1co np1kb np1rm;
    set trprob;
    run;

  ** Export the data to Excel **;
  proc export data=trprobx (keep=defolx trtmnt npclass dominv
                                 np4sb np4co np4kb np4rm np3sb np3co np3kb np3rm
                                 np2sb np2co np2kb np2rm np1sb np1co np1kb np1rm)
        outfile="&rootpath\&modelpath.&modelfile" dbms=excel replace;
    sheet = "&modelsheet";
    run;

%mend;

%transprob

%macro toASDP;
  ** This macro reads the transition probability matrix from the spreadsheet and
  **  formats it for use in ASDP **;

  ** Select appropriate treatment labels for grass type **;
  %if &gtype=1 %then %let trts = "Rest" &trtm1 &trtm2 &trtm3;
  %else %let trts = "Rest" &trtt1 &trtt2 &trtt3;

  ** Read data from Excel file **;
  proc import out=work.a datafile="&rootpath\&modelpath.&modelfile" dbms=excel replace;
    sheet="&modelsheet";
    getnames=yes;
    mixed=yes;
    scantext=yes;
    usedate=yes;
    scantime=yes;
    run;

  data a (keep=sheet defolx def trtmnt trt state1-state16 c1-c16 row);
    set a;
    length sheet $ 40;
    sheet = "&modelsheet";
    row = _n_;
    array trtvec{4} $ _temporary_ (&trts);
    do i=1 to 4;
      if trtmnt=trtvec[i] then trt = i-1;
    end;
    if defolx="<2" then def = 0;
    if defolx="2-4" then def = 1;
    if defolx=">4" then def = 2;
    array pvec{16} np4sb np4co np4kb np4rm np3sb np3co np3kb np3rm
                   np2sb np2co np2kb np2rm np1sb np1co np1kb np1rm;
    array s{16} state1-state16;
    array cvec{16} c1-c16;
    ** Calculate cumulative probabilities across each row **;
    do i=1 to 16;
      s[i] = pvec[i];
      if i=1 then cvec[i] = pvec[i];
      else cvec[i] = cvec[i-1] + pvec[i];
      ** Make the last cumulative value >1 (assures ASDP draws from this last category) **;
      if i=16 then cvec[i] = 1.1;
    end;
    run;

  proc sort data=a;
    by trt def row;
    run;

  filename out "&rootpath\&ASDPpath";

  ** Writes the matrix in C format for ASDP **;
  data _null_;
    set a end=lastone;
    by trt def;
    file out(sd_seg_&ASDPfile..txt) ls=140;
    if _n_=1 then put / @10 "/**  From worksheet '" sheet "'  **/"
                      / @10 "p[4][3][16][16] = " / @10 "{" @;
    if first.trt then put @12 "{" @;
    if first.def then put @14 "{" @;
    put @16 "{" @18 (c1-c16) (5.3 "," +1) @;
    if not lastone then
      do;
        if last.trt then put " } } }," /;
        else if last.def then put " } }," /;
        else put " },";
      end;
    else put @128 " } } } },";
    run;

%mend;

%toASDP

quit;