########################################PART1########################################### ## This part is designed to caculate the correlation of p-values is Wang's paper ##Tri distribution########################## tri<-function(n,p1,p2){ a<-runif(n) b1<-length(a[a=p1&a=p1+p2]) return(c(b1,b2,b3)) } gd<-function(r,s,p,f0,f1,f2){ pg0<-(1-p)^2 pg1<-2*p*(1-p) pg2<-p^2 k<-pg0*f0+pg1*f1+pg2*f2 pg0ca<-pg0*f0/k pg1ca<-pg1*f1/k pg2ca<-pg2*f2/k pg0co<-pg0*(1-f0)/(1-k) pg1co<-pg1*(1-f1)/(1-k) pg2co<-pg2*(1-f2)/(1-k) ca<-rep(0,3) co<-rep(0,3) ca<-tri(r,pg0ca,pg1ca) co<-tri(s,pg0co,pg1co) result<-matrix(c(ca[1],co[1],ca[2],co[2],ca[3],co[3]),nrow=2,ncol=3) return(result) } # This code implements an exact SNP test of Hardy-Weinberg Equilibrium as described in # Wigginton, JE, Cutler, DJ, and Abecasis, GR (2005) A Note on Exact Tests of # Hardy-Weinberg Equilibrium. American Journal of Human Genetics. 76: 000 - 000 # NOTE: return code of -1.0 signals an error condition SNPHWE <- function(obs_hets, obs_hom1, obs_hom2) { if (obs_hom1 < 0 || obs_hom2 < 0 || obs_hets < 0) return(-1.0) # total number of genotypes N <- obs_hom1 + obs_hom2 + obs_hets # rare homozygotes, common homozygotes obs_homr <- min(obs_hom1, obs_hom2) obs_homc <- max(obs_hom1, obs_hom2) # number of rare allele copies rare <- obs_homr * 2 + obs_hets # Initialize probability array probs <- rep(0, 1 + rare) # Find midpoint of the distribution mid <- floor(rare * ( 2 * N - rare) / (2 * N)) if ( (mid %% 2) != (rare %% 2) ) mid <- mid + 1 probs[mid + 1] <- 1.0 mysum <- 1.0 # Calculate probablities from midpoint down curr_hets <- mid curr_homr <- (rare - mid) / 2 curr_homc <- N - curr_hets - curr_homr while ( curr_hets >= 2) { probs[curr_hets - 1] <- probs[curr_hets + 1] * curr_hets * (curr_hets - 1.0) / (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0)) mysum <- mysum + probs[curr_hets - 1] # 2 fewer heterozygotes -> add 1 rare homozygote, 1 common homozygote curr_hets <- curr_hets - 2 curr_homr <- curr_homr + 1 curr_homc <- curr_homc + 1 } # Calculate probabilities from midpoint up curr_hets <- mid curr_homr <- (rare - mid) / 2 curr_homc <- N - curr_hets - curr_homr while ( curr_hets <= rare - 2) { probs[curr_hets + 3] <- probs[curr_hets + 1] * 4.0 * curr_homr * curr_homc / ((curr_hets + 2.0) * (curr_hets + 1.0)) mysum <- mysum + probs[curr_hets + 3] # add 2 heterozygotes -> subtract 1 rare homozygtote, 1 common homozygote curr_hets <- curr_hets + 2 curr_homr <- curr_homr - 1 curr_homc <- curr_homc - 1 } # P-value calculation target <- probs[obs_hets + 1] #plo <- min(1.0, sum(probs[1:obs_hets + 1]) / mysum) #phi <- min(1.0, sum(probs[obs_hets + 1: rare + 1]) / mysum) # This assignment is the last statement in the fuction to ensure # that it is used as the return value p <- min(1.0, sum(probs[probs <= target])/ mysum) } ## the LRT test LRT<-function(ca,co,score){ a<-glm(ca/(ca+co)~score, weights=ca+co, family=binomial(link=logit)) b<-glm(ca/(ca+co)~1, weights=ca+co, family=binomial(link=logit)) P_value<-1-pchisq(b$dev-a$dev,df=1) return(P_value) } r<-500 ## r is the number of case s<-500 ## s is the number of control p<-0.5 ## p is the allele frequency of minor allele f0<-0.1192 ##f2<-0.1978 f2<-0.2689 ##f2<-0.1192 ##f1<-f0 ##f1<-(f2+f0)/2 ##f1<-sqrt(f2*f0) f1<-f2 ## f0,f1,f2 is the penetrance ##score<-c(0,0,1) ##score<-c(0,0.5,1) score<-c(0,1,1) ## generate p1,p2,pmin,pmax under null hypothesis m<-10000 p1<-NULL p2<-NULL pmin<-NULL pmax<-NULL for(i in 1:m){ ma<-gd(r,s,p,f0,f0,f0) ca<-ma[1,] co<-ma[2,] p1[i]<-LRT(ca,co,score) p2[i]<-SNPHWE(ca[2],ca[1],ca[3]) pmin[i]<-min(p1[i],p2[i]) pmax[i]<-max(p1[i],p2[i]) } corr<-cov(p1,p2)/sqrt(var(p1)*var(p2)) #####################################PART2###################################################### ## This part is designed to calculate the critical value rafa<-0.01 ## rafa is the nominal type I error TS1P<-NULL TS2P<-NULL STS1P<-NULL STS2P<-NULL pmine<-NULL pmaxe<-NULL TS1E<-NULL TS2E<-NULL STS1E<-NULL STS2E<-NULL TS1P<-0.5*(1-pmin*3+1-pmax*1.5) TS2P<-0.5*(1-pmin*sqrt(2)/(sqrt(2)-1)+1-pmax*sqrt(2)) STS1P<-sort(TS1P) STS2P<-sort(TS2P) CV1P<-STS1P[(1-rafa)*m] CV2P<-STS2P[(1-rafa)*m] p1e<-runif(m) p2e<-runif(m) for(i in 1:m){ pmine[i]<-min(p1e[i],p2e[i]) pmaxe[i]<-max(p1e[i],p2e[i]) } TS1E<-0.5*(1-pmine*3+1-pmaxe*1.5) TS2E<-0.5*(1-pmine*sqrt(2)/(sqrt(2)-1)+1-pmaxe*sqrt(2)) STS1E<-sort(TS1E) STS2E<-sort(TS2E) CV1E<-STS1E[(1-rafa)*m] CV2E<-STS2E[(1-rafa)*m] ########################PART3##################################################### ## This part is designed to calculate power or Type I error p1<-NULL p2<-NULL pmin<-NULL pmax<-NULL for(i in 1:m){ ma<-gd(r,s,p,f0,f1,f2) ca<-ma[1,] co<-ma[2,] p1[i]<-LRT(ca,co,score) p2[i]<-SNPHWE(ca[2],ca[1],ca[3]) pmin[i]<-min(p1[i],p2[i]) pmax[i]<-max(p1[i],p2[i]) } TS1A<-NULL TS2A<-NULL TS1A<-0.5*(1-pmin*3+1-pmax*1.5) TS2A<-0.5*(1-pmin*sqrt(2)/(sqrt(2)-1)+1-pmax*sqrt(2)) P1P<-mean(TS1A>CV1P) ## P1P is the power of TS1 based on the permutation critical value P1E<-mean(TS1A>CV1E) ## P1E is the power of TS1 based on the exact critical value P2P<-mean(TS2A>CV2P) ## P2P is the power of TS2 based on the permutation critical value P2E<-mean(TS2A>CV2E) ## P2E is the power of TS2 based on the exact critical value PLRT<-mean(p1