/*=============================================================== Macro repeat is used to set several datasets for output =================================================================*/ %macro repeat(var,num); %do i=1 %to # &&var.&i %end; %mend repeat; /*=============================================================== Macro marker is for simulation under marker =================================================================*/ %macro marker( p_Q=0.01, /* allele freq of QTL*/ p_M=0.03, /* allele freq of marker */ D_prime=0.85, /* D' */ k=10, /* set size in ERS */ n=200, /* sample size */ epsilon=1.03, /* alternative */ rep=1000, /* replications*/ unit=2000, /* unit=n times k*/ portion=.10, /* portion of data used to estimate the cutoffs */ num=1 /* the number of simulation, used to indicate the output dataset */); /* screening the portion of nk ERS to estimate cutoff points */ data hap; do rep=1 to &rep; do i=1 to &portion*&n*&k; /* compute D from D' and four tau's */ D=&D_prime*min(&p_Q*(1-&p_M),(1-&p_q)*&p_M); tau1=&p_Q*&p_M+D; tau2=&p_Q*(1-&p_M)-D; tau3=(1-&p_Q)*&p_M-D; tau4=(1-&p_Q)*(1-&p_M)+D; /* each pair of haplotypes is generated independently using the haplotype freq's */ p1=ranuni(0); p2=ranuni(0); if p1=&&cupper&i then group="case"; else group="none"; end; %end; ID=1; TRN=1; keep rep x marker QTL ID group TRN; run; data TS; set TS0; if group="cont" or group="case"; run; /* compute the size of case-control in TRN so far */ proc univariate data=TS noprint; by rep; var ID; output out=total sum=sum; run; /* screening case and controls using cutoff points TRN: case and control ERS: case and cotnrol */ data hapa; set total; by rep; %do i=1 %to &rep; if rep=&i then do; screen=&portion*&n*&k; totsize=sum; do while ((totsize < 2*&n) or (screen<&n*&k)); /* compute D from D' and four tau's */ D=&D_prime*min(&p_Q*(1-&p_M),(1-&p_q)*&p_M); tau1=&p_Q*&p_M+D; tau2=&p_Q*(1-&p_M)-D; tau3=(1-&p_Q)*&p_M-D; tau4=(1-&p_Q)*(1-&p_M)+D; /* each pair of haplotypes is generated independently using the haplotype freq's */ p1=ranuni(0); p2=ranuni(0); if p1 &&cupper&i then do; if totsize < 2*&n then TRN=1; else TRN=0; totsize=totsize+1; screen=screen+1; group="case"; ID=1; output; end; else do; screen=screen+1; TRN=0; ID=1; group="none"; output; end; end; end; %end; keep rep marker QTL x screen group ID TRN; run; /*=== TRN case and controls comparable to ERS ===*/ data TRN; set TS hapa; if TRN=1; keep rep x marker QTL ID group; run; /*=== Get ERS ===*/ data hapb; set hapa; by rep; if screen <= &n*&k; keep rep x marker QTL ID; run; data hapc; set hapb hap; by rep; keep rep x marker QTL ID; run; data ERS; set hapc; do i=1 to &n; if (_n_-(rep-1)*&n*&k)/&k le i and (_n_-(rep-1)*&n*&k)/&k > i-1 then do; sam_size=i; set_size=(_n_-(rep-1)*&n*&k)-(i-1)*&k; end; end; run; /* sort data to get case/control for ERS */ proc sort data=ERS out=gene_xout; by rep sam_size x; run; /* ERS contains n copies of one case and one control for each replicate */ data ERS2; set gene_xout; by rep sam_size; if first.sam_size then do; group="cont"; output; end; if last.sam_size then do; group="case"; output; end; keep rep QTL marker x group ID; run; /*=== ave screening size for TRN ===*/ data scr; set hapa; if TRN=1; keep rep screen; run; proc sort data=scr; by rep screen; run; data scra; set scr; by rep screen; if last.rep; run; proc univariate data=scra noprint; var screen; output out=sout mean=screensize; run; /*=== TRN with random controls; first take the existing cases ===*/ data TScase; set TS; by rep; if group="case"; run; /* compute the size of case in TRN so far */ proc univariate data=TScase noprint; by rep; var ID; output out=total sum=sum; run; /* screening more cases for TRN up to n cases */ data TRNc; set total; %do i=1 %to &rep; if rep=&i then do; totsize=sum; do while (totsize < &n); /* compute D from D' and four tau's */ D=&D_prime*min(&p_Q*(1-&p_M),(1-&p_q)*&p_M); tau1=&p_Q*&p_M+D; tau2=&p_Q*(1-&p_M)-D; tau3=(1-&p_Q)*&p_M-D; tau4=(1-&p_Q)*(1-&p_M)+D; /* each pair of haplotypes is generated independently using the haplotype freq's */ p1=ranuni(0); p2=ranuni(0); if p1 &&cupper&i then do; totsize=totsize+1; group="case"; ID=1; output; end; end; end; %end; keep rep marker QTL x group ID; run; /* combine cases up to n */ data TRNcase; set TScase TRNc; keep rep QTL marker x group ID; run; /*=== get random samples as controls */ data cont; set ERS; if _n_ > (rep-1)*&n*&k and _n_ <= (rep-1)*&n*&k+ &n then group="Rctr"; else group="Rcse"; unit=(sam_size-1)*&k+set_size; /* defined to be randomly drawing */ keep rep unit group QTL marker x; run; proc plan; factors rep=&rep unit=&unit /noprint; output data=cont out=cont2; run; proc sort data=cont2 out=cont3; by rep unit; run; proc sort data=cont; by rep unit; run; data rancont; merge cont(drop=group) cont3(keep=rep unit group); by rep unit; if group="Rctr"; run; data rancc; set TRNcase rancont; run; data TRN2; set rancc; if group="Rctr" then group="cont"; ID=1; drop unit; run; /*=== three datasets are defined: ERS2: cases and controls from ERS; trn: cases and controls from trunc. trn2: controls are random samples. ===*/ /*============================================================ Macro t1 is to compute test statistic t1 ============================================================*/ %macro t1(datain, dataout); proc sort data=&datain out=data1; by rep group marker; run; * find total count of genotypes per rep and group; proc univariate data=data1 noprint; by rep group marker; var ID; output out=out n=count; run; * find total per rep and group; proc univariate data=data1 noprint; by rep group; var ID; output out=out2 n=n; run; data countM; set out; if marker="MM" then freq=2; else if marker="Mm" then freq=1; else freq=0; countM=freq*count; keep rep group countM; run; * find total count of M/Q allele in case and control ; proc univariate data=countM noprint; by rep group; var countM; output out=countMout sum=count; run; data data2; merge countMout out2; by rep group; run; data case cont; set data2; if group="case" then output case; if group="cont" then output cont; run; data &dataout; merge case (rename=(n=ncase count=N_U)) cont(rename=(n=ncont count=N_L)); by rep; phat=(N_U+N_L)/(2*ncase+2*ncont); T1=((N_U-2*ncase*phat)**2/(2*ncase) +(N_L-2*ncont*phat)**2/(2*ncont))/(phat*(1-phat)); if t1> cinv(0.95,1) then pt105=1; else pt105=0; /* alpha=0.05 */ if t1> cinv(0.99,1) then pt101=1; else pt101=0; /* alpha=0.01 */ pvalue=1-probchi(t1,1); if pvalue < exp(-cinv(.99,4)) then lpvt1=-cinv(.991,4); /* log p-value */ else lpvt1=log(pvalue); keep rep t1 lpvt1 pt105 pt101; run; %mend t1; /*=== compute power and log pvalue for t1 ===*/ %t1(ERS2, ERSt1); %t1(TRN, TRNt1); %t1(TRN2, TRN2t1); /*=== Macro t2star calculate power and log pvalue for each sampling plan ===*/ %macro t2star(datain, dataout); /*=== only cases are used for t2 ===*/ data t2star; set &datain; if group="case" and marker ne "MM"; drop QTL group; run; proc sort data=t2star; by rep marker; run; /*=== find x_l bar and n_l ===*/ proc univariate data=t2star noprint; by rep marker; var x; output out=t2starout mean=x_l n=n_l css=s_l; run; /*=== split and merge to calculate difference etc ===*/ data M0 M1; set t2starout (keep=rep marker x_l n_l s_l); if marker="mm" then output M0; if marker="Mm" then output M1; drop marker; run; proc sort data=M0; by rep; run; proc sort data=M1; by rep; run; /*=== find the numerator of t2star ===*/ data &dataout; merge M0 (rename=(n_l=n_0 x_l=x_0 s_l=s_0)) M1 (rename=(n_l=n_1 x_l=x_1 s_l=s_1)); by rep; s2=(s_0+s_1)/(n_0+n_1-2); T2s=(x_0-x_1)**2/((n_0+n_1)/(n_0*n_1)*s2); if t2s>cinv(0.95,1) then pt2s05=1; else pt2s05=0; if t2s>cinv(0.99,1) then pt2s01=1; else pt2s01=0; pvalue=1-probchi(t2s,1); if pvalue < exp(-cinv(.99,4)) then lpvt2s=-cinv(.991,4); /* log p-value */ else lpvt2s=log(pvalue); keep rep pt2s05 pt2s01 lpvt2s; run; /* proc univariate data=&dataout noprint; var pt2s05 pt2s01; output out=t2s mean=pt2s05 pt2s01; run; title3 "t2 star: &datain"; proc print; run; */ %mend t2star; %t2star(ERS2, ERSt2s); %t2star(trn, TRNt2s); %t2star(trn2, trn2t2s); /*=== output datasets with results: ERSt1, ERSt2s and TRNt1, TRNt2s ===*/ data result; merge ERSt1 (rename=(pt105 =pt1ERS05 pt101 =pt1ERS01 lpvt1 =lpvt1ERS)) ERSt2s (rename=(pt2s05=pt2ERS05 pt2s01=pt2ERS01 lpvt2s=lpvt2ERS)) TRNt1 (rename=(pt105 =pt1TRN05 pt101 =pt1TRN01 lpvt1 =lpvt1TRN)) TRNt2s (rename=(pt2s05=pt2TRN05 pt2s01=pt2TRN01 lpvt2s=lpvt2TRN)) TRN2t1 (rename=(pt105 =p2t1TRN05 pt101 =p2t1TRN01 lpvt1 =lp2vt1TRN)) TRN2t2s (rename=(pt2s05=p2t2TRN05 pt2s01=p2t2TRN01 lpvt2s=lp2vt2TRN)); by rep; t3ERS = -2*lpvt1ERS -2*lpvt2ERS; /* ERS */ t3TRN = -2*lpvt1TRN -2*lpvt2TRN; /* Trunc */ t3TRNran = -2*lp2vt1TRN -2*lp2vt2TRN; /* Trunc randon controls */ if t3ERS > cinv(0.95,4) then pt3ERS05=1; else pt3ERS05=0; if t3ERS > cinv(0.99,4) then pt3ERS01=1; else pt3ERS01=0; if t3TRN > cinv(0.95,4) then pt3TRN05=1; else pt3TRN05=0; if t3TRN > cinv(0.99,4) then pt3TRN01=1; else pt3TRN01=0; if t3TRNran> cinv(0.95,4) then p2t3TRN05=1; else p2t3TRN05=0; if t3TRNran> cinv(0.99,4) then p2t3TRN01=1; else p2t3TRN01=0; run; /*=== compare powers ===*/ proc univariate data=result noprint; var pt1ERS05 pt1TRN05 p2t1TRN05 pt2ERS05 pt2TRN05 p2t2TRN05 pt3ERS05 pt3TRN05 p2t3TRN05 pt1ERS01 pt1TRN01 p2t1TRN01 pt2ERS01 pt2TRN01 p2t2TRN01 pt3ERS01 pt3TRN01 p2t3TRN01; output out=output mean = t1ERS05 t1TCC05 t1TC05 t2ERS05 t2TCC05 t2TC05 t3ERS05 t3TCC05 t3TC05 t1ERS01 t1TCC01 t1TC01 t2ERS01 t2TCC01 t2TC01 t3ERS01 t3TCC01 t3TC01; run; data output# set output; power05=compress(t1ERS05||"~"||t1TCC05||"~"||t1TC05||"~"||t2ERS05||"~"||t2TCC05||"~"||t2TC05||"~" ||t3ERS05||"~"||t3TCC05||"~"||t3TC05); power01=compress(t1ERS01||"~"||t1TCC01||"~"||t1TC01||"~"||t2ERS01|| "~"||t2TCC01||"~"||t2TC01||"~"||t3ERS01||"~"||t3TCC01||"~"||t3TC01); keep power05 power01; run; data outputa# merge output&num sout; output05=compress(power05||"~"||screensize); keep output05; run; data outputb# merge output&num sout; output01=compress(power01||"~"||screensize); keep output01; run; %mend marker; /*================================================== simulation marker ====================================================*/ /*k=20*/ %marker(p_M=0.01, D_prime=0.95,epsilon=1.03, portion=.05,k=20, unit=4000, num=1); %marker(p_M=0.01, D_prime=0.95,epsilon=1.03, portion=.10,k=20, unit=4000, num=2); %marker(p_M=0.01, D_prime=0.95,epsilon=1.03, portion=.50,k=20, unit=4000, num=3); %marker(p_M=0.01, D_prime=0.85,epsilon=1.03, portion=.05,k=20, unit=4000, num=4); %marker(p_M=0.01, D_prime=0.85,epsilon=1.03, portion=.10,k=20, unit=4000, num=5); %marker(p_M=0.01, D_prime=0.85,epsilon=1.03, portion=.50,k=20, unit=4000, num=6); %marker(p_M=0.01, D_prime=0.50,epsilon=1.03, portion=.05,k=20, unit=4000, num=7); %marker(p_M=0.01, D_prime=0.50,epsilon=1.03, portion=.10,k=20, unit=4000, num=8); %marker(p_M=0.01, D_prime=0.50,epsilon=1.03, portion=.50,k=20, unit=4000, num=9); %marker(p_M=0.03, D_prime=0.95,epsilon=1.03, portion=.05,k=20, unit=4000, num=10); %marker(p_M=0.03, D_prime=0.95,epsilon=1.03, portion=.10,k=20, unit=4000, num=11); %marker(p_M=0.03, D_prime=0.95,epsilon=1.03, portion=.50,k=20, unit=4000, num=12); %marker(p_M=0.03, D_prime=0.85,epsilon=1.03, portion=.05,k=20, unit=4000, num=13); %marker(p_M=0.03, D_prime=0.85,epsilon=1.03, portion=.10,k=20, unit=4000, num=14); %marker(p_M=0.03, D_prime=0.85,epsilon=1.03, portion=.50,k=20, unit=4000, num=15); %marker(p_M=0.03, D_prime=0.50,epsilon=1.03, portion=.05,k=20, unit=4000, num=16); %marker(p_M=0.03, D_prime=0.50,epsilon=1.03, portion=.10,k=20, unit=4000, num=17); %marker(p_M=0.03, D_prime=0.50,epsilon=1.03, portion=.50,k=20, unit=4000, num=18); %marker(p_M=0.01, D_prime=0.95,epsilon=1.65, portion=.05,k=20, unit=4000, num=19); %marker(p_M=0.01, D_prime=0.95,epsilon=1.65, portion=.10,k=20, unit=4000, num=20); %marker(p_M=0.01, D_prime=0.95,epsilon=1.65, portion=.50,k=20, unit=4000, num=21); %marker(p_M=0.01, D_prime=0.85,epsilon=1.65, portion=.05,k=20, unit=4000, num=22); %marker(p_M=0.01, D_prime=0.85,epsilon=1.65, portion=.10,k=20, unit=4000, num=23); %marker(p_M=0.01, D_prime=0.85,epsilon=1.65, portion=.50,k=20, unit=4000, num=24); %marker(p_M=0.01, D_prime=0.50,epsilon=1.65, portion=.05,k=20, unit=4000, num=25); %marker(p_M=0.01, D_prime=0.50,epsilon=1.65, portion=.10,k=20, unit=4000, num=26); %marker(p_M=0.01, D_prime=0.50,epsilon=1.65, portion=.50,k=20, unit=4000, num=27); %marker(p_M=0.03, D_prime=0.95,epsilon=1.65, portion=.05,k=20, unit=4000, num=28); %marker(p_M=0.03, D_prime=0.95,epsilon=1.65, portion=.10,k=20, unit=4000, num=29); %marker(p_M=0.03, D_prime=0.95,epsilon=1.65, portion=.50,k=20, unit=4000, num=30); %marker(p_M=0.03, D_prime=0.85,epsilon=1.65, portion=.05,k=20, unit=4000, num=31); %marker(p_M=0.03, D_prime=0.85,epsilon=1.65, portion=.10,k=20, unit=4000, num=32); %marker(p_M=0.03, D_prime=0.85,epsilon=1.65, portion=.50,k=20, unit=4000, num=33); %marker(p_M=0.03, D_prime=0.50,epsilon=1.65, portion=.05,k=20, unit=4000, num=34); %marker(p_M=0.03, D_prime=0.50,epsilon=1.65, portion=.10,k=20, unit=4000, num=35); %marker(p_M=0.03, D_prime=0.50,epsilon=1.65, portion=.50,k=20, unit=4000, num=36); /* k=20: marker */ data k20a; set %repeat(outputa,36); run; title "Marker, k=20, alpha=0.05"; proc print data=k20a; run; data k20b; set %repeat(outputb,36); run; title "Marker, k=20, alpha=0.01"; proc print data=k20b; run; /*k=10*/ %marker(p_M=0.01, D_prime=0.95,epsilon=1.03, portion=.05,K=10, unit=2000, num=1); %marker(p_M=0.01, D_prime=0.95,epsilon=1.03, portion=.10,K=10, unit=2000, num=2); %marker(p_M=0.01, D_prime=0.95,epsilon=1.03, portion=.50,K=10, unit=2000, num=3); %marker(p_M=0.01, D_prime=0.85,epsilon=1.03, portion=.05,K=10, unit=2000, num=4); %marker(p_M=0.01, D_prime=0.85,epsilon=1.03, portion=.10,K=10, unit=2000, num=5); %marker(p_M=0.01, D_prime=0.85,epsilon=1.03, portion=.50,K=10, unit=2000, num=6); %marker(p_M=0.01, D_prime=0.50,epsilon=1.03, portion=.05,K=10, unit=2000, num=7); %marker(p_M=0.01, D_prime=0.50,epsilon=1.03, portion=.10,K=10, unit=2000, num=8); %marker(p_M=0.01, D_prime=0.50,epsilon=1.03, portion=.50,K=10, unit=2000, num=9); %marker(p_M=0.03, D_prime=0.95,epsilon=1.03, portion=.05,K=10, unit=2000, num=10); %marker(p_M=0.03, D_prime=0.95,epsilon=1.03, portion=.10,K=10, unit=2000, num=11); %marker(p_M=0.03, D_prime=0.95,epsilon=1.03, portion=.50,K=10, unit=2000, num=12); %marker(p_M=0.03, D_prime=0.85,epsilon=1.03, portion=.05,K=10, unit=2000, num=13); %marker(p_M=0.03, D_prime=0.85,epsilon=1.03, portion=.10,K=10, unit=2000, num=14); %marker(p_M=0.03, D_prime=0.85,epsilon=1.03, portion=.50,K=10, unit=2000, num=15); %marker(p_M=0.03, D_prime=0.50,epsilon=1.03, portion=.05,K=10, unit=2000, num=16); %marker(p_M=0.03, D_prime=0.50,epsilon=1.03, portion=.10,K=10, unit=2000, num=17); %marker(p_M=0.03, D_prime=0.50,epsilon=1.03, portion=.50,K=10, unit=2000, num=18); %marker(p_M=0.01, D_prime=0.95,epsilon=1.65, portion=.05,K=10, unit=2000, num=19); %marker(p_M=0.01, D_prime=0.95,epsilon=1.65, portion=.10,K=10, unit=2000, num=20); %marker(p_M=0.01, D_prime=0.95,epsilon=1.65, portion=.50,K=10, unit=2000, num=21); %marker(p_M=0.01, D_prime=0.85,epsilon=1.65, portion=.05,K=10, unit=2000, num=22); %marker(p_M=0.01, D_prime=0.85,epsilon=1.65, portion=.10,K=10, unit=2000, num=23); %marker(p_M=0.01, D_prime=0.85,epsilon=1.65, portion=.50,K=10, unit=2000, num=24); %marker(p_M=0.01, D_prime=0.50,epsilon=1.65, portion=.05,K=10, unit=2000, num=25); %marker(p_M=0.01, D_prime=0.50,epsilon=1.65, portion=.10,K=10, unit=2000, num=26); %marker(p_M=0.01, D_prime=0.50,epsilon=1.65, portion=.50,K=10, unit=2000, num=27); %marker(p_M=0.03, D_prime=0.95,epsilon=1.65, portion=.05,K=10, unit=2000, num=28); %marker(p_M=0.03, D_prime=0.95,epsilon=1.65, portion=.10,K=10, unit=2000, num=29); %marker(p_M=0.03, D_prime=0.95,epsilon=1.65, portion=.50,K=10, unit=2000, num=30); %marker(p_M=0.03, D_prime=0.85,epsilon=1.65, portion=.05,K=10, unit=2000, num=31); %marker(p_M=0.03, D_prime=0.85,epsilon=1.65, portion=.10,K=10, unit=2000, num=32); %marker(p_M=0.03, D_prime=0.85,epsilon=1.65, portion=.50,K=10, unit=2000, num=33); %marker(p_M=0.03, D_prime=0.50,epsilon=1.65, portion=.05,K=10, unit=2000, num=34); %marker(p_M=0.03, D_prime=0.50,epsilon=1.65, portion=.10,K=10, unit=2000, num=35); %marker(p_M=0.03, D_prime=0.50,epsilon=1.65, portion=.50,K=10, unit=2000, num=36); data k10a; set %repeat(outputa,36); run; title "Marker, k=10, alpha=0.05"; proc print data=k10a; run; data k10b; set %repeat(outputb,36); run; title "Marker, k=10, alpha=0.01"; proc print data=k10b; run; /*=============================================================== Macro trait is for simulation of candidate gene =================================================================*/ %macro trait(p_Q=0.01, k=10, n=200, epsilon=1.03, rep=1000, unit=2000, portion=.05, num=1); /*=================================== data setups =====================================*/ /* screening the portion of nk ERS to estimate cutoff points */ data hap; do rep=1 to &rep; do i=1 to &portion*&n*&k; p1=ranuni(0); p2=ranuni(0); if p1<&p_Q then allele1="Q"; else allele1="q"; if p2<&p_Q then allele2="Q"; else allele2="q"; if allele1="Q" and allele2="Q" then do; QTL="QQ"; x=rannor(0)+2*ε end; else if allele1="Q" and allele2="q" then do; QTL="Qq"; x=rannor(0)+ε end; else if allele1="q" and allele2="Q" then do; QTL="Qq"; x=rannor(0)+ε end; else if allele1="q" and allele2="q" then do; QTL="qq"; x=rannor(0); end; ID=1; output; end; end; run; /* find cutoffs */ proc sort data=hap out=out; by rep x; run; data lower (keep=rep lower) upper (keep=rep upper); set out; by rep x; if _n_=(rep-1)*&portion*&n*&k+&portion*&n then do; lower=x; output lower; end; if _n_=(rep-1/&k)*&portion*&n*&k then do; upper=x; output upper; end; run; data out; merge lower upper; by rep; %do i=1 %to &rep; if rep=&i then do; call symput("clower&i", lower); call symput("cupper&i", upper); end; %end; run; /* TRN from the screening samples used to estimate cutoffs */ data TS0; set hap; by rep; %do i=1 %to &rep; if rep=&i then do; if x<=&&clower&i then group="cont"; else if x>=&&cupper&i then group="case"; else group="none"; end; %end; ID=1; TRN=1; keep rep x QTL ID group TRN; run; data TS; set TS0; if group="cont" or group="case"; run; /* compute the size of case-control in TRN so far */ proc univariate data=TS noprint; by rep; var ID; output out=total sum=sum; run; /* screening case and controls using cutoff points TRN: case and control ERS: case and cotnrol */ data hapa; set total; by rep; %do i=1 %to &rep; if rep=&i then do; screen=&portion*&n*&k; totsize=sum; do while ((totsize < 2*&n) or (screen<&n*&k)); p1=ranuni(0); p2=ranuni(0); if p1<&p_Q then allele1="Q"; else allele1="q"; if p2<&p_Q then allele2="Q"; else allele2="q"; if allele1="Q" and allele2="Q" then do; QTL="QQ"; x=rannor(0)+2*ε end; else if allele1="Q" and allele2="q" then do; QTL="Qq"; x=rannor(0)+ε end; else if allele1="q" and allele2="Q" then do; QTL="Qq"; x=rannor(0)+ε end; else if allele1="q" and allele2="q" then do; QTL="qq"; x=rannor(0); end; if x < &&clower&i then do; if totsize < 2*&n then TRN=1; else TRN=0; totsize=totsize+1; screen=screen+1; group="cont"; ID=1; output; end; else if x > &&cupper&i then do; if totsize < 2*&n then TRN=1; else TRN=0; totsize=totsize+1; screen=screen+1; group="case"; ID=1; output; end; else do; screen=screen+1; TRN=0; ID=1; group="none"; output; end; end; end; %end; keep rep QTL x screen group ID TRN; run; /*=== TRN case and controls comparable to ERS ===*/ data TRN; set TS hapa; if TRN=1; keep rep x QTL ID group; run; /*=== Get ERS ===*/ data hapb; set hapa; by rep; if screen <= &n*&k; keep rep x QTL ID; run; data hapc; set hapb hap; keep rep x QTL ID; run; proc sort data=hapc; by rep; run; /* define sample size and set size for ERS */ data ERS; set hapc; do i=1 to &n; if (_n_-(rep-1)*&n*&k)/&k le i and (_n_-(rep-1)*&n*&k)/&k > i-1 then do; sam_size=i; set_size=(_n_-(rep-1)*&n*&k)-(i-1)*&k; end; end; run; /* sort data to get case/control for ERS */ proc sort data=ERS out=gene_xout; by rep sam_size x; run; /* ERS contains n copies of one case and one control for each replicate */ data ERS2; set gene_xout; by rep sam_size; if first.sam_size then do; group="cont"; output; end; if last.sam_size then do; group="case"; output; end; keep rep QTL x group ID; run; /*=== ave screening size for TRN ===*/ data scr; set hapa; if TRN=1; keep rep screen; run; proc sort data=scr; by rep screen; run; data scra; set scr; by rep screen; if last.rep; run; proc univariate data=scra noprint; var screen; output out=sout mean=screensize; run; /*=== TRN with random controls; first take the existing cases ===*/ data TScase; set TS; by rep; if group="case"; run; /* compute the size of case in TRN so far */ proc univariate data=TScase noprint; by rep; var ID; output out=total sum=sum; run; /* screening more cases for TRN up to n cases */ data TRNc; set total; %do i=1 %to &rep; if rep=&i then do; totsize=sum; do while (totsize < &n); p1=ranuni(0); p2=ranuni(0); if p1<&p_Q then allele1="Q"; else allele1="q"; if p2<&p_Q then allele2="Q"; else allele2="q"; if allele1="Q" and allele2="Q" then do; QTL="QQ"; x=rannor(0)+2*ε end; else if allele1="Q" and allele2="q" then do; QTL="Qq"; x=rannor(0)+ε end; else if allele1="q" and allele2="Q" then do; QTL="Qq"; x=rannor(0)+ε end; else if allele1="q" and allele2="q" then do; QTL="qq"; x=rannor(0); end; if x > &&cupper&i then do; totsize=totsize+1; group="case"; ID=1; output; end; end; end; %end; keep rep QTL x group ID; run; /* combine cases up to n */ data TRNcase; set TScase TRNc; keep rep QTL x group ID; run; /*=== get random samples as controls */ data cont; set ERS; if _n_ > (rep-1)*&n*&k and _n_ <= (rep-1)*&n*&k+ &n then group="Rctr"; else group="Rcse"; unit=(sam_size-1)*&k+set_size; /* defined to be randomly drawing */ keep rep unit group QTL x; run; proc plan; factors rep=&rep unit=&unit /noprint; output data=cont out=cont2; run; proc sort data=cont2 out=cont3; by rep unit; run; proc sort data=cont; by rep unit; run; data rancont; merge cont(drop=group) cont3(keep=rep unit group); by rep unit; if group="Rctr"; run; data rancc; set TRNcase rancont; run; data TRN2; set rancc; if group="Rctr" then group="cont"; ID=1; drop unit; run; /*=== three datasets are defined: ERS2: cases and controls from ERS; trn: cases and controls from trunc. trn2: controls are random samples. ===*/ /*============================================================ compute test statistic t1 ============================================================*/ /*=== macro t1 calculate power and log pvalue for each sampling plan ===*/ %macro t1(datain, dataout); proc sort data=&datain out=data1; by rep group QTL; run; * find total count of genotypes per rep and group; proc univariate data=data1 noprint; by rep group QTL; var ID; output out=out n=count; run; * find total per rep and group; proc univariate data=data1 noprint; by rep group; var ID; output out=out2 n=n; run; data countQ; set out; if QTL="QQ" then freq=2; else if QTL="Qq" then freq=1; else freq=0; countQ=freq*count; keep rep group countQ; run; * find total count of Q allele in case and control ; proc univariate data=countQ noprint; by rep group; var countQ; output out=countQout sum=count; run; data data2; merge countQout out2; by rep group; run; data case cont; set data2; if group="case" then output case; if group="cont" then output cont; run; data &dataout; merge case (rename=(n=ncase count=N_U)) cont(rename=(n=ncont count=N_L)); by rep; phat=(N_U+N_L)/(2*ncase+2*ncont); T1=((N_U-2*ncase*phat)**2/(2*ncase) +(N_L-2*ncont*phat)**2/(2*ncont))/(phat*(1-phat)); *T1=(N_U/(2*ncase)-N_L/(2*ncont))**2/((N_U+N_L)*(2*(ncase+ncont)-N_U-N_L)/(8*ncase*ncont*(ncase+ncont))); if t1> cinv(0.95,1) then pt105=1; else pt105=0; /* alpha=0.05 */ if t1> cinv(0.99,1) then pt101=1; else pt101=0; /* alpha=0.01 */ pvalue=1-probchi(t1,1); if pvalue < exp(-cinv(.99,4)) then lpvt1=-cinv(.991,4); /* log p-value */ else lpvt1=log(pvalue); keep rep t1 lpvt1 pt105 pt101; run; %mend t1; /*=== compute power and log pvalue for t1 ===*/ %t1(ERS2, ERSt1); %t1(trn, TRNt1); %t1(trn2, TRN2t1); /*============================================================= Compute test statistics t2 and t3 =============================================================*/ /*=== Macro t2star calculate power and log pvalue for each sampling plan ===*/ %macro t2star(datain, dataout); /*=== only cases are used for t2 ===*/ data t2star; set &datain; if group="case" and QTL ne "QQ"; drop group; run; proc sort data=t2star; by rep QTL; run; /*=== find x_l bar and n_l ===*/ proc univariate data=t2star noprint; by rep QTL; var x; output out=t2starout mean=x_l n=n_l css=s_l; run; /*=== split and merge to calculate difference etc ===*/ data Q0 Q1; set t2starout (keep=rep QTL x_l n_l s_l); if QTL="qq" then output Q0; if QTL="Qq" then output Q1; drop QTL; run; proc sort data=Q0; by rep; run; proc sort data=Q1; by rep; run; /*=== find the numerator of t2star ===*/ data &dataout; merge Q0 (rename=(n_l=n_0 x_l=x_0 s_l=s_0)) Q1 (rename=(n_l=n_1 x_l=x_1 s_l=s_1)); by rep; s2=(s_0+s_1)/(n_0+n_1-2); T2s=(x_0-x_1)**2/((n_0+n_1)/(n_0*n_1)*s2); if t2s>cinv(0.95,1) then pt2s05=1; else pt2s05=0; if t2s>cinv(0.99,1) then pt2s01=1; else pt2s01=0; pvalue=1-probchi(t2s,1); if pvalue < exp(-cinv(.99,4)) then lpvt2s=-cinv(.991,4); /* log p-value */ else lpvt2s=log(pvalue); keep rep pt2s05 pt2s01 lpvt2s; run; %mend t2star; %t2star(ERS2, ERSt2s); %t2star(trn, TRNt2s); %t2star(trn2, trn2t2s); /*=== output datasets with results: ERSt1, ERSt2s and TRNt1, TRNt2s ===*/ data result; merge ERSt1 (rename=(pt105 =pt1ERS05 pt101 =pt1ERS01 lpvt1 =lpvt1ERS)) ERSt2s (rename=(pt2s05=pt2ERS05 pt2s01=pt2ERS01 lpvt2s=lpvt2ERS)) TRNt1 (rename=(pt105 =pt1TRN05 pt101 =pt1TRN01 lpvt1 =lpvt1TRN)) TRNt2s (rename=(pt2s05=pt2TRN05 pt2s01=pt2TRN01 lpvt2s=lpvt2TRN)) TRN2t1 (rename=(pt105 =p2t1TRN05 pt101 =p2t1TRN01 lpvt1 =lp2vt1TRN)) TRN2t2s (rename=(pt2s05=p2t2TRN05 pt2s01=p2t2TRN01 lpvt2s=lp2vt2TRN)); by rep; t3ERS = -2*lpvt1ERS -2*lpvt2ERS; /* ERS */ t3TRN = -2*lpvt1TRN -2*lpvt2TRN; /* Trunc */ t3TRNran = -2*lp2vt1TRN -2*lp2vt2TRN; /* Trunc randon controls */ if t3ERS > cinv(0.95,4) then pt3ERS05=1; else pt3ERS05=0; if t3ERS > cinv(0.99,4) then pt3ERS01=1; else pt3ERS01=0; if t3TRN > cinv(0.95,4) then pt3TRN05=1; else pt3TRN05=0; if t3TRN > cinv(0.99,4) then pt3TRN01=1; else pt3TRN01=0; if t3TRNran> cinv(0.95,4) then p2t3TRN05=1; else p2t3TRN05=0; if t3TRNran> cinv(0.99,4) then p2t3TRN01=1; else p2t3TRN01=0; run; /*=== compare powers ===*/ proc univariate data=result noprint; var pt1ERS05 pt1TRN05 p2t1TRN05 pt2ERS05 pt2TRN05 p2t2TRN05 pt3ERS05 pt3TRN05 p2t3TRN05 pt1ERS01 pt1TRN01 p2t1TRN01 pt2ERS01 pt2TRN01 p2t2TRN01 pt3ERS01 pt3TRN01 p2t3TRN01; output out=output mean = t1ERS05 t1TCC05 t1TC05 t2ERS05 t2TCC05 t2TC05 t3ERS05 t3TCC05 t3TC05 t1ERS01 t1TCC01 t1TC01 t2ERS01 t2TCC01 t2TC01 t3ERS01 t3TCC01 t3TC01; run; data output# set output; power05=compress(t1ERS05||"~"||t1TCC05||"~"||t1TC05||"~"||t2ERS05||"~"||t2TCC05||"~"||t2TC05||"~" ||t3ERS05||"~"||t3TCC05||"~"||t3TC05); power01=compress(t1ERS01||"~"||t1TCC01||"~"||t1TC01||"~"||t2ERS01|| "~"||t2TCC01||"~"||t2TC01||"~"||t3ERS01||"~"||t3TCC01||"~"||t3TC01); keep power05 power01; run; data outputa# merge output&num sout; output05=compress(power05||"~"||screensize); keep output05; run; data outputb# merge output&num sout; output01=compress(power01||"~"||screensize); keep output01; run; %mend trait; /*================================================== simulation ====================================================*/ /*k=10*/ %trait(epsilon=.75,portion=.05, k=10, unit=2000, num=1); %trait(epsilon=.75,portion=.10, k=10, unit=2000, num=2); %trait(epsilon=.75,portion=.50, k=10, unit=2000, num=3); %trait(epsilon=.90,portion=.05, k=10, unit=2000, num=4); %trait(epsilon=.90,portion=.10, k=10, unit=2000, num=5); %trait(epsilon=.90,portion=.50, k=10, unit=2000, num=6); %trait(epsilon=1.03,portion=.05, k=10, unit=2000, num=7); %trait(epsilon=1.03,portion=.10, k=10, unit=2000, num=8); %trait(epsilon=1.03,portion=.50, k=10, unit=2000, num=9); %trait(epsilon=1.65,portion=.05, k=10, unit=2000, num=10); %trait(epsilon=1.65,portion=.10, k=10, unit=2000, num=11); %trait(epsilon=1.65,portion=.50, k=10, unit=2000, num=12); data k10a; set %repeat(outputa,12); run; title "Trait, k=10, alpha=0.05"; proc print data=k10a; run; data k10b; set %repeat(outputb,12); run; title "Trait, k=10, alpha=0.01"; proc print data=k10b; run; /*k=20*/ %trait(epsilon=.75,portion=.05, k=20, unit=4000, num=1); %trait(epsilon=.75,portion=.10, k=20, unit=4000, num=2); %trait(epsilon=.75,portion=.50, k=20, unit=4000, num=3); %trait(epsilon=.90,portion=.05, k=20, unit=4000, num=4); %trait(epsilon=.90,portion=.10, k=20, unit=4000, num=5); %trait(epsilon=.90,portion=.50, k=20, unit=4000, num=6); %trait(epsilon=1.03,portion=.05, k=20, unit=4000, num=7); %trait(epsilon=1.03,portion=.10, k=20, unit=4000, num=8); %trait(epsilon=1.03,portion=.50, k=20, unit=4000, num=9); %trait(epsilon=1.65,portion=.05, k=20, unit=4000, num=10); %trait(epsilon=1.65,portion=.10, k=20, unit=4000, num=11); %trait(epsilon=1.65,portion=.50, k=20, unit=4000, num=12); data k20a; set %repeat(outputa,12); run; title "Trait, k=20, alpha=0.05"; proc print data=k20a; run; data k20b; set %repeat(outputb,12); run; title "Trait, k=20, alpha=0.01"; proc print data=k20b; run;