OPTIONS NOCENTER FORMCHAR = "|----|+|---+="; TITLE1 'Data From: R. Mermelstein NCI smoking cessation project'; TITLE2 'Looking at imputation of smoking variable - OR = 2'; /* looking at last timepoint and collapsing controls & no-shows vs two treatment groups */ /* performing simulation for the missing observations - OR=2 for MISSING & SMOKING */ /* putting in uncertainty attributable to BETAs */ /* USING PROC MIANALYZE for getting the MI results */ DATA one; INFILE 'c:\smoke.dat'; INPUT id smk miss smk0 grp; /* these variables are coded as follows: grp 0=tx 1=control smk (smoking at final endpoint - 24 month followup) 0=abstinent 1=smoking smk0 (smoking at post-intervention) 0=abstinent 1=smoking miss (missing at final endpoint) 0=observed 1=missing */ /* observed cell frequencies - these were obtained previous to this analysis */ n111 = 42; /* number of abstainers - smk0=abstinent */ n112 = 71; /* number of smokers - smk0=abstinent */ n211 = 36; /* number of abstainers - smk0=smoking */ n212 = 223; /* number of smokers - smk0=smoking */ /* using OR=2 for MISSING with SMOKING */ orat = 2; beta0m = LOG(n112/n111); beta1m = LOG(orat); beta2m = LOG(n212/n211); beta3m = LOG(orat); seed = 974677743; /* figure out cell frequencies for the missing cells */ /* these are based on the observed cell frequencies and the assumed odds ratio */ n12dot = 37; /* number of missing for smk0=abstinent */ p122 = (orat*(n112/n111))/(1 + (orat*(n112/n111))); n122 = p122*n12dot; n121 = (1 - p122)*n12dot; n22dot = 80; /* number of missing for smk0=smoking */ p222 = (orat*(n212/n211))/(1 + (orat*(n212/n211))); n222 = p222*n22dot; n221 = (1 - p222)*n22dot; /* variance associated with betas */ beta0v = (n111 + n112)/(n111*n112); beta1v = 1/n111 + 1/n112 + 1/n121 + 1/n122; beta2v = (n211 + n212)/(n211*n212); beta3v = 1/n211 + 1/n212 + 1/n221 + 1/n222; /* 100 simulated datasets */ DATA sim; SET one; ARRAY smks(100) smks1-smks100; DO i = 1 TO 100; IF miss EQ 0 THEN smks(i) = smk; IF miss EQ 1 THEN DO; exp1 = RANEXP(seed); exp2 = RANEXP(seed); stdl = LOG(exp1/exp2); ran0 = RANNOR(seed); ran1 = RANNOR(seed); /* the next lines incorporate the covariance between beta0 and beta1 using the Cholesky factorization of the variance-covariance matrix (and likewise for beta2 and beta3) */ beta0 = beta0m + ran0*SQRT(beta0v); beta1 = beta1m - ran0*SQRT(beta0v) + ran1*SQRT(beta1v - beta0v); ran2 = RANNOR(seed); ran3 = RANNOR(seed); beta2 = beta2m + ran2*SQRT(beta2v); beta3 = beta3m - ran2*SQRT(beta2v) + ran3*SQRT(beta3v - beta2v); ystar = (1 - smk0)*(beta0 + beta1*miss) + smk0*(beta2 + beta3*miss) + stdl; smks(i) = 0; IF ystar > 0 THEN smks(i) = 1; END; END; /* set up the data in long format - with imputation number being a variable */ DATA unisim (KEEP = id grp smki _imputation_); SET sim; ARRAY smks(100) smks1-smks100; DO _imputation_ = 1 to 100; smki = smks(_imputation_); OUTPUT; END; PROC SORT; BY _imputation_ ; RUN; PROC LOGISTIC DESCENDING NOPRINT OUTEST=outlreg COVOUT; MODEL smki = grp / LINK = LOGIT; BY _imputation_ ; RUN; PROC MIANALYZE DATA=outlreg; VAR intercept grp; RUN;