################################################################### # Function: sampsize.oneway # Purpose: Determine sample size needed to detect delta of differentially expressed features # while controlling the FDR at the tau level. # Arguments: # tau - desired level of FDR control # delta - desired average power # theta - theta vector (see Pounds and Cheng 2005), use bgtheta.oneway (below) to determine from background data # ngrps - number of experimental groups in the oneway layout # BH - use Benjamini and Hochberg (1995) procedure? (default = T) # n0 - starting per-group sample size for (default = 2) # nmax - maximum per-group sample size considered feasible (default = 50) # # Returns: a list with the components listed below # n - sample size estimate # alpha - anticipated alpha # aFDR - anticipated FDR # apow - anticipated average power # tau - desired FDR power specified # delta - desired average power # OK - indicates (T/F) whether sample size meets criterion #################################################################### sampsize.oneway<-function(tau,delta,theta,ngrps=2,BH=F,n0=2,nmax=50) { aFDR<-1 n<-n0-1 while((aFDR>tau)&&(n=delta) return(list(n=n,alpha=alpha,aFDR=aFDR,apow=apow,tau=tau,delta=delta,OK=OK)) } ################################################################ # Function: bgtheta.oneway # Purpose: Use F-statistics computed from background data to determine theta for sampsize.oneway # Arguments: Fstats - vector of F-statistics from background data # df1 - numerator degrees of freedom for Fstats # df2 - denominator degrees of freedom for Fstats # fdr.adj - indicates whether to adjust effect size estimates for FDR (default = T) # Returns: a theta vector to use in sampsize.oneway ################################################################# bgtheta.oneway<-function(Fstats,df1,df2,fdr.adj=T) { m<-length(Fstats) th0<-(Fstats>df2/(df2-2))*df1*((df2-2)*Fstats/df2-1)/(2*(df1+df2+1)/(df1+1)) p<-1-pf(Fstats,df1,df2) fdrres<-splosh(p) k<-floor(m*(1-fdrres$pi)) th0[rank(p)>k]<-0 if (!fdr.adj) return(list(theta=th0)) th1<-(1-fdrres$q)*th0 return(list(theta=th0,fdr.theta=th1,fdrres=fdrres)) } ############################################################# # Function: aFDR.oneway # Purpose: Compute the anticipated false discovery ratio # Arguments: alpha - the fixed p-value threshold used to determine significance # theta - vector of ratios of sum of squared effect sizes to twice the variance # ngrps - number of groups compared in the experiment # n - per-group sample size n # BH95 - boolean that indicates whether Benjamini & Hochberg (1995) will be used in final analysis (default = F) # Returns: the aFDR given the supplied parameters ########################################################################## aFDR.oneway<-function(alpha,theta,ngrps,n,BH95=F) { lam<-n*theta df1<-ngrps-1 df2<-ngrps*(n-1) pi.hat<-mean(exp(-lam)) if (!BH95) return(pi.hat*alpha/(1-mean(pf(qf(1-alpha,df1,df2),df1,df2,lam)))) else return(alpha/(1-mean(pf(qf(1-alpha,df1,df2),df1,df2,lam)))) } #################################################### # Function: inverse.avepow.oneway # Purpose: Determine alpha such that average power equals delta # Arguments: delta - desired average power # theta - vector of ratios of sum of squared effect sizes to twice the variance # ngrps - number of groups in the comparison # n - per-group sample size # nbisect - number of bisections to perform (default = 50) # Returns: the smallest alpha such that the average power exceeds or equals delta ######################################################## inverse.avepow.oneway<-function(delta,theta,ngrps,n,nbisect=50) { upp<-1 low<-0 avepow.upp<-1 avepow.low<-0 for (i in 1:nbisect) { mid<-(low+upp)/2 avepow.mid<-avepow.oneway(mid,theta,ngrps,n) upp<-mid*(avepow.mid>delta)+upp*(avepow.mid<=delta) low<-mid*(avepow.mid=delta) } return(upp) } #################################################################### # Function: avepow.oneway # Purpose: Compute the average power for a set of one-way ANOVAs # Arguments: alpha - the fixed p-value threshold used to determine significance # theta - vector of ratios of sum of squared effect sizes to twice the variance # ngrps - number of groups in the comparison # n - per-group sample size # Returns: the average power given the specified parameters ############################################################################## avepow.oneway<-function(alpha,theta,ngrps,n) { if (!any(theta>0)) return(0) nz.theta<-theta[theta!=0] lam<-n*nz.theta df1<-ngrps-1 df2<-ngrps*(n-1) return((1-mean(pf(qf(1-alpha,df1,df2),df1,df2,lam)))) } ####################################################################################################### # Function: splosh # Purpose: Implement the spacings loess histogram method (Pounds and Cheng 2004, Bioinformatics) # Arguments: pvals - a vector of p-values obtained by applying a statistical hypothesis # test to the expression values for each gene individually # LC - (optional) a list containing tuning parameters for the loess algorithm, # see help(loess.control) for more details, defaults to S-plus defaults # dplaces - (optional) round p-values to this many decimal places to avoid # numerical difficulties that can arise by division by small numbers, default = 6 # notes - (optional) may pass any string describing notes regarding the analysis, default = NULL # # Outputs: a list with the following elements # fdr - a vector of the cfdr estimates at the values specified by the user in pvals # q - a vector of corresponding monotone fdr estimates, similar to Storey's q-value # cdf - vector of the SPLOSH cdf estimate evaluated at pvals # fp - vector of estimated no. of false positives incurred by setting threshold # at corresponding value of pvals # fn - vector of estimated no. of false negatives incurred by setting threshold # at corresponding value of pvals # toterr - vector giving the sum of fp and fn for each entry # ebp - the empirical Bayes posterior that the null hypothesis is true, see Efron et al. (JASA 2001) # pi - estimate of the mixing parameter giving the probability of the null hypothesis # ord - a vector giving index numbers to order results according to ascending p-values # dplaces - echoes value of dplaces used in algorithm # LC - echoes value of LC used in algorithm # fdr.method - gives bibliographic reference for the method # notes - returns notes regarding the analysis ########################################################################################################### splosh<-function(pvals,LC=loess.control(),dplaces=3,notes=NULL) { notes<-c(notes,"SPLOSH used to estimate FDR.") x<-round(pvals,dplaces) # Prevent numerical difficulties n<-length(pvals) # Obtain no. of points r<-rank(x) # rank observations ux<-sort(unique(x)) # ordered unique p-values ur<-(sort(unique(r)))-.5 # ordered unique ranks #plot(ux,ur) if (max(ux)<1) # edge adjustments { ux<-c(ux,1) ur<-c(ur,n) } if (min(ux)>0) { ux<-c(0,ux) ur<-c(0,ur) } nux<-length(ux) # No. of unique points ur<-ur/n # Divide ranks by n dx<-diff(ux) # spacings of unique p-values dr<-diff(ur) # spacings of unique ranks dFdx<-exp(log(dr)-log(dx)) # spacing-interval slopes mr<-(ur[1:(nux-1)]+ur[2:nux])/(2) # Unitized Medians of Unique Rank Intervals mx<-(ux[1:(nux-1)]+ux[2:nux])/2 # Unitized Medians of Unique p-value Intervals tr<-asin(2*(mr-.5)) # Arc-Sine Transform Rank Midpoints fit<-loess(log(dFdx)~tr,loess.control=LC) # Lowess of log-transformed EDQF and transformed rank-midpoints tr2<-asin(2*(ur-.5)) # Arc-Sine Transform Unique yhat<-(predict.loess(fit,tr2)) # Obtain Estimated Derivative of CDF (up to constant multiplier) yhat[c(1,nux)]<-approx(tr2[2:(nux-1)], yhat[2:(nux-1)], xout=tr2[c(1,nux)], rule=3)$y # Extrapolate to get endpoint estimates pdf1<-exp(yhat) # Obtain PDF up to a constant trap<-0.5*(pdf1[1:(nux-1)]+pdf1[2:nux])*dx # Trapezoid rule terms const<-sum(trap) # Constant via trapezoid rule pdf2<-pdf1/const # Adjust pdf by constant pdf<-approx(ux,pdf2,xout=pvals,rule=3)$y # Obtain pdf estimate for p-values cdf<-approx(ux,c(0,cumsum(trap)), xout=pvals,rule=2)$y/const # Obtain cdf pi<-min(pdf) # Take pi as min of PDF p.pi<-max(pvals[pdf==pi]) # largest p-value at which pi occurs cdf.pi<-max(cdf[pvals==p.pi]) # CDF evaluated at p.pi cfdr<-pi*pvals/cdf # Estimate cFDR for p-values ebp<-pi/pdf # Estimate EBP of null for p-values cfdr[pvals==0]<-ebp[pvals==0] # L'Hospital's Rule cfdr[cfdr>1]<-1 # Set max(fdr)=1 qval<-cfdr # Initialize q-value vector ordering<-order(pvals) # Report Results qval[rev(ordering)]<-cummin(qval[rev(ordering)]) # Compute q-value fp<-n*cfdr*cdf # Compute Estimated No. False Positives fn<-n*(cdf.pi-cdf-pi*(p.pi-pvals))*(pvals<=p.pi) # Compute Estimated No. False Negatives toterr<-fp+fn # Compute Estimated Total No. Errors return(list(fdr=matrix(cfdr,n,1), # cfdr estimate q=matrix(qval,n,1), # q-value estimate cdf=matrix(cdf,n,1), # cdf estimate fp=matrix(fp,n,1), # estimate of no. of false postives fn=matrix(fn,n,1), # estimate of no. of false negatives toterr=matrix(toterr,n,1), # estimated total no. of errors pdf=matrix(pdf,n,1), # estimate of pdf ebp=matrix(ebp,n,1), # estimate of ebp pi=pi, # estimate of null proportion p.pi=p.pi, # p-value at which estimated pdf achieves minimum cdf.pi=cdf.pi, # cdf evaluated at p.pi ord=ordering, # ordering vector dplaces=dplaces, # rounded to this no. of decimal places LC=LC, # loess.control options used fdr.method="Pounds and Cheng (2004)", # Bibliographic Information notes=notes)) # Notes regarding analysis }