Skip to Main Content

Sample SAS Program

  /*================================================================*
 *This SAS program is to analyze twin data. It has three sections.*
 *The first section reads the data. You need to specify your own  *
 *path for the data set and the variable names to be input. This  *
 *is also where you create your new variables.                    *
 *The second section should not be changed. This section creates  *
 *random effects to accommodate different correlations of MZ and  *
 *DZ twins.                                                       *
 *The third section executes different models. You need to include*
 *appropriate variables for your analysis as well as try different*
 *mode of inheritance and initial parameter values.               *
 *Please email heping.zhang@yale.edu if you discovered any errors *
 *with this program. This program can be freely used and          *
 *distributed. However, the owners of this program is not         *
 *responsible for inaccuracy or errors as a result of using this  *
 *program. It is the responsibility of the users to verify the    *
 *the validity of this program and conduct their analysis         * 
 *appropriately.                                                  *
 *================================================================*/

/*Begin Section 1*/
data one;
*you need to enter your own file path in the next statement;
INFILE 'Z:\yj56\twin studies\080312P\biometrics-simb.txt' missover delimiter='09'x  firstobs=2;
*you need to include appropriate variables the next statement;
input FAMID     BPD     SEX X1 ZYGO ID;
run;

/*End Section 1*/
/*Begin Section 2: no changes are needed in this section*/
proc iml;
covA1=sqrsym({1,1,1});
gA1=t(root(covA1));
covA2=sqrsym({1,0.5,1});
gA2=t(root(covA2));
covD1=sqrsym({1,1,1});
gD1=t(root(covD1));
covD2=sqrsym({1,0.25,1});
gD2=t(root(covD2));

/* for additive genetic effect, the G design matrix determines the model through
the model covariance structure: VAR[A]=GZG`. The mixed model is then represented
as: logit(p{y=1}) = mu + ZG + e */

g1=gA1||gD1;                    /*combine matrix for MZ twin*/
g2=gA2||gD2;                    /*combine matrix for DZ twin*/
use one;
        read all var {FAMID zygo} into vzygo;
close one;

nsub=nrow(vzygo);
g=j(nsub,4,.);
do i=1 to nsub;
        ind=2-mod(i,2);
        if(vzygo[i,2]=1) then g[i,]=g1[ind,];                   /* MZ twin*/
        if(vzygo[i,2]=0) then g[i,]=g2[ind,];                   /* DZ twin*/
end;

cname={"aone" "atwo" "done" "dtwo"};
create rmatrix from g[colname=cname];
append from g;
close rmatrix;
quit;

data rmatrix;
set rmatrix;
id = _n_;
run;

data two;
merge one rmatrix;
by id;
run;

/*End Section 2*/

/*Begin Section 3*/
title1 'ADE model';
proc nlmixed data=two;
*you need to choose your own initial values;
*it may be useful to run a simple model such as PROC LOGISTIC;
*usually you may need to try-and-err;
parms beta1 = 0.1, beta2=-0.79, s1=0.1, s3=0.1;
*include the fixed covariates in the next statement;
fixed1 = beta0+beta1*SEX+beta2*X1;
*the following statement specifies additive (a1 and a2) and;
*common shared (c) random effects;
random1 = a1*aone+a2*atwo+c;
eta = fixed1 + random1;
*specifies a probit model;
p = probnorm(eta); 
model BPD ~ binary(p);
random a1 a2 c ~ normal([0,0,0],[s1,0,s1,0,0,s3]) subject=FAMID; 
estimate 'heritability' s1/(1+s1+s3);
run;
title2 'ACE model';
proc nlmixed data=two;
parms beta1 = 0.1, beta2=-0.79, s1=0.1, s2=0.1;;
fixed1 = beta0+beta1*SEX+beta2*X1;
*the following statement specifies additive (a1 and a2) and;
*dominant (d1 and d2) random effects;
random1 = a1*aone+a2*atwo+d1*done+d2*dtwo;
eta = fixed1 + random1;
p = 1 - probnorm(eta); 
model BPD ~ binary(p);
random a1 a2 d1 d2 ~ normal([0,0,0,0],[s1,0,s1,0,0,s2,0,0,0,s2]) subject=FAMID; 
estimate 'heritability' (s1+s2)/(1+s1+s2);
run;
/*End Section 3*/