## This is the working gwas script on Nov 14, 2016 # This is a function to create orthogonal contrasts for latent growth models orthog <- function(waves){ int <- rep(1, waves) lin <- (1:waves)-(mean(1:waves)) qua <- ((1:waves)-(mean(1:waves)))^2 - mean(((1:waves)-(mean(1:waves)))^2) lam <- cbind(int, lin, qua) out <- t(lam) %*% lam lambda <- matrix(NA, waves, 3) for(i in 1:3){lambda[,i] <- lam[,i]/ sqrt(out[i,i])} lambda } # This function chops a larger file (of SNPs) into a more managable file that can be read into R easily. snpData <- function(SNPdata, start = 1, end = 1000){ DATA <- as.data.frame(system(paste("cut -d ' ' -f", paste(start, "-", end, sep=""), SNPdata, "| tail -n +2"), intern = T)) colnames(DATA) <- "Xs" snp <- data.frame(do.call('rbind', strsplit(as.character(DATA$Xs),' ',fixed=TRUE))) for(s in 1:length(snp)){ snp[,s] <- as.numeric(snp[,s])-1 } sysNames <- paste("cut -d ' ' -f", paste(start, "-", end, inc=""), SNPdata, " | head -1") VARS <- paste(system(sysNames, intern = T), collapse = "") VARS <- gsub("[[:punct:]]", "", VARS) VARNAMES <- strsplit(VARS, ' ', fixed = T)[[1]] colnames(snp) <- VARNAMES snp } # This function is used to match the Genotype and phenotype files for the analysis. selection <- function(idfile, matchfile, outfile){ ids <- read.table(idfile, header = T) nr <- dim(ids)[1] for ( i in 1:nr) { a <- ids[i,1] b <- ids[i,2] system(paste("awk ' ($1==" , a , "&& $2==", b, ") {print $0}'", matchfile , ">>", outfile) ) } } # File to cut up SNPs file into smaller, usable file snpFile <- function(input, output, start, end){ system(paste("cut -d ' ' -f", paste(start, "-", end, sep=""), input, ">", output) ) } # Mike's Function to identify the various ordinal/binary items in the analysis countOrdinals <- function (data) { if (!is.data.frame(data)) {stop("Data must be a dataframe to intelligently discern ordinal from continuous variables")} nvar <- dim(data)[[2]] isord <- vector(mode="logical",nvar) nameList <- names(data) ordnameList <- vector(mode="character",nvar) contnameList <- vector(mode="character",nvar) nthresh <- vector(mode="integer",nvar) varNs <- vector(mode="integer",nvar) correlationLabels <- matrix(NA,nrow=nvar,ncol=nvar) nordinal <- 0 ncontinuous <- 0 for (i in 1:nvar) { varNs[i] <- sum(!is.na(data[,i])) if (is.factor(data[,i])) { nordinal <- nordinal + 1 nthresh[i] <- length(table(data[,i]))-1 ordnameList[nordinal] <- nameList[i] isord[i] <- TRUE } else { ncontinuous <- ncontinuous + 1 nthresh[i] <- 0 contnameList[ncontinuous] <- nameList[i] isord[i] <- FALSE } for (k in 1:nvar) { if (i > k) { correlationLabels[i,k] <- paste("r",i,k) correlationLabels[k,i] <- paste("r",i,k) } } } maxnthresh <- max(nthresh) return(list(ncontinuous=ncontinuous, nordinal=nordinal, contnameList=contnameList, ordnameList=ordnameList, isord=isord, nthresh=nthresh, maxnthresh= maxnthresh, correlationLabels=correlationLabels, varNs=varNs)) } ## This function conducts all of the pairwise correlations between the variables provided facCov <- function(dataset, VarNames = colnames(dataset), covariates = NULL , contin = FALSE, maxCor = .99){ UseVars <- c(VarNames, covariates) dataset <- as.data.frame(subset(dataset, select = UseVars )) ordtest <- as.data.frame(t(apply(dataset, 2, function(x) length(unique(x))))) ordtest[,contin] <- ordtest[,contin]+20 for (ord in 1:dim(dataset)[2]) { if (ordtest[ord] < 11) dataset[,ord] <- mxFactor((dataset[,ord]),levels=c(min(dataset[,ord], na.rm = T ):max(dataset[,ord], na.rm = T ))) else dataset[ord] <- dataset[ord] } ords<- countOrdinals(as.data.frame(dataset)) data <- subset(dataset, select = UseVars) pair <- combn(colnames(dataset), 2) nVars <- length(colnames(dataset)) mat<- apply(pair, 2, function(x){ dat <- subset(dataset, select = x) # dat <- subset(dataset, select = pair[,41]) selVars <- colnames(dat) ORDS <- countOrdinals(as.data.frame(dat)) conVars <- ORDS$contnameList[1:ORDS$ncontinuous] ordVars <- ORDS$ordnameList[1:ORDS$nordinal] pairNs <- sum(complete.cases(dat)) varNs <- ORDS$varNs Dat <- mxData(observed=dat, type="raw") # Set up the Covariance and Mean Matrices covFree <- matrix(T, 2, 2) covFree[1,1] <- ORDS$isord[1] != T covFree[2,2] <- ORDS$isord[2] != T covs <- mxMatrix("Symm", 2, 2, values=diag(2), free=covFree, labels = c("r11", "r21", "r22"), ubound = ifelse(ORDS$nordinal==2, maxCor, maxCor), name="covs") Mean <- mxMatrix("Full", 1, 2, free = ifelse(ORDS$isord == T, F, T), values = 0, labels = c("mu1", "mu2"), name="Mean") # Set up the Threshold Matrix if (ORDS$nordinal ==1) ifelse(ORDS$nthresh[1] > 0, ThrVals <- matrix(c(seq(0, ORDS$nthresh[1]-1), rep(NA, ORDS$maxnthresh - ORDS$nthresh[1])), ORDS$maxnthresh, 1), ThrVals <- matrix(c(seq(0, ORDS$nthresh[2]-1), rep(NA, ORDS$maxnthresh - ORDS$nthresh[2])), ORDS$maxnthresh, 1)) if (ORDS$nordinal ==2) ThrVals <- apply(matrix(ORDS$nthresh, 1,2), 2, function(x) c(seq(0, x-1), rep(NA, ORDS$maxnthresh - x))) if (ORDS$nordinal ==1 ) ifelse(ORDS$nthresh[1] > 0, ThrLabs <- paste("v1_th",1:ORDS$maxnthresh,sep=""), ThrLabs <- paste("v2_th",1:ORDS$maxnthresh,sep="")) if (ORDS$nordinal ==2 ) ThrLabs <- c(paste("v1_th",1:ORDS$maxnthresh,sep=""),paste("v2_th",1:ORDS$maxnthresh,sep="")) if (ORDS$nordinal > 0) { if (max(ORDS$nthresh) >0 ) {Ups <- mxMatrix("Full", ORDS$maxnthresh, ORDS$nordinal, free = !is.na(ThrVals), values = ThrVals, labels = ThrLabs, name = "Ups") }} expFunCC <- mxExpectationNormal(covariance="covs", means="Mean", dimnames=selVars) expFunCO <- mxExpectationNormal(covariance="covs", means="Mean", thresholds = "Ups", threshnames = ordVars, dimnames = selVars) fitFunction <- mxFitFunctionML() if (sum(ORDS$isord)<1) obj <- expFunCC else obj <- expFunCO if (sum(ORDS$isord)<1) pars <- c(Dat, covs, Mean) else pars <- c(Dat, covs, Mean, Ups )#, increment , Thresh) corModel <- mxModel("cor", pars, obj, fitFunction) corFit <- mxRun(corModel, unsafe = F, silent = T) if(corFit$output$status$code > 1) corFit <- mxRun(corFit, unsafe = F, silent = T) if(corFit$output$status$code > 1) corFit <- mxRun(corFit, unsafe = F, silent = T) if(corFit$output$status$code > 1) corFit <- mxRun(corFit, unsafe = F, silent = T) if(corFit$output$status$code > 1) corFit <- mxRun(corFit, unsafe = F, silent = T) #summary(corFit) est <- as.data.frame(t(corFit@output$estimate)) se <- as.data.frame(t(corFit@output$standardErrors)) colnames(se) <- names(corFit@output$estimate) c(v1N = varNs[1], v2N = varNs[2], pairNs = pairNs, var1 = ifelse(is.null(est$r11), NA, est$r11), cov21 = ifelse(is.null(est$r21), NA, est$r21), var2 = ifelse(is.null(est$r22), NA, est$r22), mu1 = ifelse(is.null(est$mu1), NA, est$mu1), mu2 = ifelse(is.null(est$mu2), NA, est$mu2), v1_th1 = ifelse(is.null(est$v1_th1), NA, est$v1_th1), v1_th2 = ifelse(is.null(est$v1_th2), NA, est$v1_th2), v1_th3 = ifelse(is.null(est$v1_th3), NA, est$v1_th3), v1_th4 = ifelse(is.null(est$v1_th4), NA, est$v1_th4), v1_th5 = ifelse(is.null(est$v1_th5), NA, est$v1_th5), v1_th6 = ifelse(is.null(est$v1_th6), NA, est$v1_th6), v1_th7 = ifelse(is.null(est$v1_th7), NA, est$v1_th7), v1_th8 = ifelse(is.null(est$v1_th8), NA, est$v1_th8), v1_th9 = ifelse(is.null(est$v1_th9), NA, est$v1_th9), v2_th1 = ifelse(is.null(est$v2_th1), NA, est$v2_th1), v2_th2 = ifelse(is.null(est$v2_th2), NA, est$v2_th2), v2_th3 = ifelse(is.null(est$v2_th3), NA, est$v2_th3), v2_th4 = ifelse(is.null(est$v2_th4), NA, est$v2_th4), v2_th5 = ifelse(is.null(est$v2_th5), NA, est$v2_th5), v2_th6 = ifelse(is.null(est$v2_th6), NA, est$v2_th6), v2_th7 = ifelse(is.null(est$v2_th7), NA, est$v2_th7), v2_th8 = ifelse(is.null(est$v2_th8), NA, est$v2_th8), v2_th9 = ifelse(is.null(est$v2_th9), NA, est$v2_th9), var1SE = ifelse(is.null(se$r11), NA, se$r11), covSE = ifelse(is.null(se$r21), NA, se$r21), var2SE = ifelse(is.null(se$r22), NA, se$r22), mu1SE = ifelse(is.null(se$mu1), NA, se$mu1), mu2SE = ifelse(is.null(se$mu2), NA, se$mu2), SEv1_th1 = ifelse(is.null(se$v1_th1), NA, se$v1_th1), SEv1_th2 = ifelse(is.null(se$v1_th2), NA, se$v1_th2), SEv1_th3 = ifelse(is.null(se$v1_th3), NA, se$v1_th3), SEv1_th4 = ifelse(is.null(se$v1_th4), NA, se$v1_th4), SEv1_th5 = ifelse(is.null(se$v1_th5), NA, se$v1_th5), SEv1_th6 = ifelse(is.null(se$v1_th6), NA, se$v1_th6), SEv1_th7 = ifelse(is.null(se$v1_th7), NA, se$v1_th7), SEv1_th8 = ifelse(is.null(se$v1_th8), NA, se$v1_th8), SEv1_th9 = ifelse(is.null(se$v1_th9), NA, se$v1_th9), SEv2_th1 = ifelse(is.null(se$v2_th1), NA, se$v2_th1), SEv2_th2 = ifelse(is.null(se$v2_th2), NA, se$v2_th2), SEv2_th3 = ifelse(is.null(se$v2_th3), NA, se$v2_th3), SEv2_th4 = ifelse(is.null(se$v2_th4), NA, se$v2_th4), SEv2_th5 = ifelse(is.null(se$v2_th5), NA, se$v2_th5), SEv2_th6 = ifelse(is.null(se$v2_th6), NA, se$v2_th6), SEv2_th7 = ifelse(is.null(se$v2_th7), NA, se$v2_th7), SEv2_th8 = ifelse(is.null(se$v2_th8), NA, se$v2_th8), SEv2_th9 = ifelse(is.null(se$v2_th9), NA, se$v2_th9), status = corFit$output$status$code) }) # end of the correlation calculations # Now format the output results <- as.data.frame(t(mat)) results$covWeights <- sqrt(results$pairNs-1)/results$covSE results$var1Weights <- sqrt(results$pairNs-1)/results$var1SE results$var2Weights <- sqrt(results$pairNs-1)/results$var2SE results$mu1Weights <- sqrt(results$pairNs-1)/results$mu1SE results$mu2Weights <- sqrt(results$pairNs-1)/results$mu2SE results$v1t1Weights <- sqrt(results$v1N-1)/results$SEv1_th1 results$v1t2Weights <- sqrt(results$v1N-1)/results$SEv1_th2 results$v1t3Weights <- sqrt(results$v1N-1)/results$SEv1_th3 results$v1t4Weights <- sqrt(results$v1N-1)/results$SEv1_th4 results$v1t5Weights <- sqrt(results$v1N-1)/results$SEv1_th5 results$v1t6Weights <- sqrt(results$v1N-1)/results$SEv1_th6 results$v1t7Weights <- sqrt(results$v1N-1)/results$SEv1_th7 results$v1t8Weights <- sqrt(results$v1N-1)/results$SEv1_th8 results$v1t9Weights <- sqrt(results$v1N-1)/results$SEv1_th9 results$v2t1Weights <- sqrt(results$v2N-1)/results$SEv2_th1 results$v2t2Weights <- sqrt(results$v2N-1)/results$SEv2_th2 results$v2t3Weights <- sqrt(results$v2N-1)/results$SEv2_th3 results$v2t4Weights <- sqrt(results$v2N-1)/results$SEv2_th4 results$v2t5Weights <- sqrt(results$v2N-1)/results$SEv2_th5 results$v2t6Weights <- sqrt(results$v2N-1)/results$SEv2_th6 results$v2t7Weights <- sqrt(results$v2N-1)/results$SEv2_th7 results$v2t8Weights <- sqrt(results$v2N-1)/results$SEv2_th8 results$v2t9Weights <- sqrt(results$v2N-1)/results$SEv2_th9 results$v1name <- t(pair)[,1] results$v2name <- t(pair)[,2] sel1 <- c("v1N","var1","var1SE","var1Weights","mu1", "mu1SE", "mu1Weights", "v1_th1", "v1_th2", "v1_th3", "v1_th4", "v1_th5", "v1_th6", "v1_th7", "v1_th8", "v1_th9", "SEv1_th1", "SEv1_th2", "SEv1_th3","SEv1_th4", "SEv1_th5", "SEv1_th6", "SEv1_th7", "SEv1_th8", "SEv1_th9", "v1t1Weights", "v1t2Weights", "v1t3Weights", "v1t4Weights", "v1t5Weights","v1t6Weights","v1t7Weights","v1t8Weights","v1t9Weights", "v1name") sel2 <- c("v2N", "var2","var2SE","var2Weights","mu2", "mu2SE", "mu2Weights", "v2_th1", "v2_th2", "v2_th3", "v2_th4", "v2_th5", "v2_th6", "v2_th7", "v2_th8", "v2_th9", "SEv2_th1", "SEv2_th2", "SEv2_th3", "SEv2_th4","SEv2_th5","SEv2_th6","SEv2_th7","SEv2_th8","SEv2_th9", "v2t1Weights", "v2t2Weights", "v2t3Weights", "v2t4Weights", "v2t5Weights","v2t6Weights","v2t7Weights","v2t8Weights","v2t9Weights","v2name") sels <- c("N", "var","varSE","varW","mu", "muSE", "muW", "th1", "th2", "th3", "th4","th5","th6","th7","th8","th9", "SE_th1", "SE_th2", "SE_th3", "SE_th4", "SE_th5","SE_th6","SE_th7","SE_th8","SE_th9", "t1Weights", "t2Weights", "t3Weights", "t4Weights", "t5Weights", "t6Weights", "t7Weights", "t8Weights", "t9Weights", "name") sum.stats <- matrix(NA , nVars, 34, dimnames = list(UseVars, sels[1:34])) for(i in 1:nVars){ x <- subset(results, results$v1name == UseVars[i], sel1); colnames(x) <- sels y <- subset(results, results$v2name == UseVars[i], sel2); colnames(y) <- sels set <- rbind(x,y) for(j in 1:34){ sum.stats[i,j] <- mean(set[,j], trim = .1, na.rm = T) } } sum.stats <- as.data.frame(sum.stats) covariances <- results$cov21 variance <- sum.stats$var varW <- ifelse(is.nan(sum.stats$varW), 0, sum.stats$varW) mu <- t(as.matrix(ifelse(is.nan(sum.stats$mu), 0,sum.stats$mu))); colnames(mu) <- UseVars muW <- t(as.matrix(ifelse(is.nan(sum.stats$muW), 0,sum.stats$muW))); colnames(muW) <- UseVars Thr1 <- sum.stats$th1 Thr2 <- sum.stats$th2 Thr3 <- sum.stats$th3 Thr4 <- sum.stats$th4 Thr5 <- sum.stats$th5 Thr6 <- sum.stats$th6 Thr7 <- sum.stats$th7 Thr8 <- sum.stats$th8 Thr9 <- sum.stats$th9 THRESH <- as.data.frame(rbind(Thr1, Thr2, Thr3, Thr4, Thr5, Thr6, Thr7, Thr8, Thr9))[1:ords$maxnthresh,] covSEs <- results$covSE Thr1SEs <- sum.stats$SE_th1 Thr2SEs <- sum.stats$SE_th2 Thr3SEs <- sum.stats$SE_th3 Thr4SEs <- sum.stats$SE_th4 Thr5SEs <- sum.stats$SE_th5 Thr6SEs <- sum.stats$SE_th6 Thr7SEs <- sum.stats$SE_th7 Thr8SEs <- sum.stats$SE_th8 Thr9SEs <- sum.stats$SE_th9 colnames(THRESH) <- UseVars THRESH <- THRESH[,ords$isord] THRESHse <- as.data.frame(rbind( Thr1SEs, Thr2SEs, Thr3SEs, Thr4SEs, Thr5SEs, Thr6SEs, Thr7SEs, Thr8SEs, Thr9SEs))[1:ords$maxnthresh,] covWs <- results$covWeights Thr1Ws <- sum.stats$t1Weights Thr2Ws <- sum.stats$t2Weights Thr3Ws <- sum.stats$t3Weights Thr4Ws <- sum.stats$t4Weights Thr5Ws <- sum.stats$t5Weights Thr6Ws <- sum.stats$t6Weights Thr7Ws <- sum.stats$t7Weights Thr8Ws <- sum.stats$t8Weights Thr9Ws <- sum.stats$t9Weights colnames(THRESHse) <- UseVars THRESHse <- THRESHse[,ords$isord] THRESHw <- as.data.frame(rbind(Thr1Ws, Thr2Ws, Thr3Ws, Thr4Ws, Thr5Ws, Thr6Ws, Thr7Ws, Thr8Ws, Thr9Ws))[1:ords$maxnthresh,] colnames(THRESHw) <- UseVars THRESHw <- THRESHw[,ords$isord] Covs <- matrix(0, nVars, nVars) Covs[lower.tri(Covs)] <- covariances Cov <- Covs + t(Covs) + diag(variance) dimnames(Cov) <- list(UseVars, UseVars) for(k in 1:dim(Cov)[1]){ifelse(is.nan(Cov[k,k]),Cov[k,k]<- 1, Cov[k,k]<- Cov[k,k])} CovWei <- matrix(0, nVars, nVars) CovWei[lower.tri(CovWei)] <- covWs CovWeis <- CovWei + t(CovWei) + diag(varW) dimnames(CovWeis) <- list(UseVars, UseVars) return(list(covMatrix = Cov, covWeights = CovWeis, meanMatrix = mu, meanWeights = muW, thresholds = THRESH, thrWeights = THRESHw, nThresh = ords$nthresh, results = results)) } # This is the facCov function where the means and variances are freed for the ordinal varibles, and the first two factor loadings are fixed at zero and 1. facCov01 <- function(dataset, VarNames = colnames(dataset), covariates = NULL , contin = FALSE, maxCor = 3){ UseVars <- c(VarNames, covariates) dataset <- as.data.frame(subset(dataset, select = UseVars )) ordtest <- as.data.frame(t(apply(dataset, 2, function(x) length(unique(x))))) ordtest[,contin] <- ordtest[,contin]+20 for (ord in 1:dim(dataset)[2]) { if (ordtest[ord] < 11) dataset[,ord] <- mxFactor((dataset[,ord]),levels=c(min(dataset[,ord], na.rm = T ):max(dataset[,ord], na.rm = T ))) else dataset[ord] <- dataset[ord] } ords<- countOrdinals(as.data.frame(dataset)) data <- subset(dataset, select = UseVars) pair <- combn(colnames(dataset), 2) nVars <- length(colnames(dataset)) mat<- apply(pair, 2, function(x){ dat <- subset(dataset, select = x) # dat <- subset(dataset, select = pair[,1]) selVars <- colnames(dat) ORDS <- countOrdinals(as.data.frame(dat)) conVars <- ORDS$contnameList[1:ORDS$ncontinuous] ordVars <- ORDS$ordnameList[1:ORDS$nordinal] pairNs <- sum(complete.cases(dat)) varNs <- ORDS$varNs Dat <- mxData(observed=dat, type="raw") # Set up the Covariance and Mean Matrices covFree <- matrix(T, 2, 2) covFree[1,1] <- ORDS$nthresh[1] != 1 covFree[2,2] <- ORDS$nthresh[2] != 1 covs <- mxMatrix("Symm", 2, 2, values=diag(2), free=covFree, labels = c("r11", "r21", "r22"), ubound = ifelse(ORDS$nordinal==2, maxCor, 20), name="covs") # Mean <- mxMatrix("Full", 1, 2, free = ifelse(ORDS$isord == T, F, T), values = 0, labels = c("mu1", "mu2"), name="Mean") Mean <- mxMatrix("Full", 1, 2, free = T, values = 0, labels = c("mu1", "mu2"), name="Mean") # Set up the Threshold Matrix if (ORDS$nordinal ==1) ifelse(ORDS$nthresh[1] > 0, ThrVals <- matrix(c(seq(0, ORDS$nthresh[1]-1), rep(NA, ORDS$maxnthresh - ORDS$nthresh[1])), ORDS$maxnthresh, 1), ThrVals <- matrix(c(seq(0, ORDS$nthresh[2]-1), rep(NA, ORDS$maxnthresh - ORDS$nthresh[2])), ORDS$maxnthresh, 1)) if (ORDS$nordinal ==2) ThrVals <- apply(matrix(ORDS$nthresh, 1,2), 2, function(x) c(seq(0, x-1), rep(NA, ORDS$maxnthresh - x))) if (ORDS$nordinal ==1 ) ifelse(ORDS$nthresh[1] > 0, ThrLabs <- paste("v1_th",1:ORDS$maxnthresh,sep=""), ThrLabs <- paste("v2_th",1:ORDS$maxnthresh,sep="")) if (ORDS$nordinal ==2 ) ThrLabs <- c(paste("v1_th",1:ORDS$maxnthresh,sep=""),paste("v2_th",1:ORDS$maxnthresh,sep="")) if (ORDS$maxnthresh ==1 ) THFR <- F if (ORDS$maxnthresh ==2 ) THFR <- matrix(c(F,F), 2, 1) if (ORDS$maxnthresh >2 ) THFR <- matrix(c(F, F, rep(T, ORDS$maxnthresh-2)), ORDS$maxnthresh, 1) if (ORDS$nordinal > 0) { if (max(ORDS$nthresh) >0 ) {Ups <- mxMatrix("Full", ORDS$maxnthresh, ORDS$nordinal, free = THFR, values = ThrVals, labels = ThrLabs, name = "Ups") }} expFunCC <- mxExpectationNormal(covariance="covs", means="Mean", dimnames=selVars) expFunCO <- mxExpectationNormal(covariance="covs", means="Mean", thresholds = "Ups", threshnames = ordVars, dimnames = selVars) fitFunction <- mxFitFunctionML() if (sum(ORDS$isord)<1) obj <- expFunCC else obj <- expFunCO if (sum(ORDS$isord)<1) pars <- c(Dat, covs, Mean) else pars <- c(Dat, covs, Mean, Ups )#, increment , Thresh) corModel <- mxModel("cor", pars, obj, fitFunction) corFit <- mxRun(corModel, unsafe = F, silent = T) if(corFit$output$status$code > 1) corFit <- mxRun(corFit, unsafe = F, silent = T) est <- as.data.frame(t(corFit@output$estimate)) se <- as.data.frame(t(corFit@output$standardErrors)) colnames(se) <- names(corFit@output$estimate) c(v1N = varNs[1], v2N = varNs[2], pairNs = pairNs, var1 = ifelse(is.null(est$r11), NA, est$r11), cov21 = ifelse(is.null(est$r21), NA, est$r21), var2 = ifelse(is.null(est$r22), NA, est$r22), mu1 = ifelse(is.null(est$mu1), NA, est$mu1), mu2 = ifelse(is.null(est$mu2), NA, est$mu2), v1_th1 = ifelse(is.null(est$v1_th1), NA, est$v1_th1), v1_th2 = ifelse(is.null(est$v1_th2), NA, est$v1_th2), v1_th3 = ifelse(is.null(est$v1_th3), NA, est$v1_th3), v1_th4 = ifelse(is.null(est$v1_th4), NA, est$v1_th4), v1_th5 = ifelse(is.null(est$v1_th5), NA, est$v1_th5), v1_th6 = ifelse(is.null(est$v1_th6), NA, est$v1_th6), v1_th7 = ifelse(is.null(est$v1_th7), NA, est$v1_th7), v1_th8 = ifelse(is.null(est$v1_th8), NA, est$v1_th8), v1_th9 = ifelse(is.null(est$v1_th9), NA, est$v1_th9), v2_th1 = ifelse(is.null(est$v2_th1), NA, est$v2_th1), v2_th2 = ifelse(is.null(est$v2_th2), NA, est$v2_th2), v2_th3 = ifelse(is.null(est$v2_th3), NA, est$v2_th3), v2_th4 = ifelse(is.null(est$v2_th4), NA, est$v2_th4), v2_th5 = ifelse(is.null(est$v2_th5), NA, est$v2_th5), v2_th6 = ifelse(is.null(est$v2_th6), NA, est$v2_th6), v2_th7 = ifelse(is.null(est$v2_th7), NA, est$v2_th7), v2_th8 = ifelse(is.null(est$v2_th8), NA, est$v2_th8), v2_th9 = ifelse(is.null(est$v2_th9), NA, est$v2_th9), var1SE = ifelse(is.null(se$r11), NA, se$r11), covSE = ifelse(is.null(se$r21), NA, se$r21), var2SE = ifelse(is.null(se$r22), NA, se$r22), mu1SE = ifelse(is.null(se$mu1), NA, se$mu1), mu2SE = ifelse(is.null(se$mu2), NA, se$mu2), SEv1_th1 = ifelse(is.null(se$v1_th1), NA, se$v1_th1), SEv1_th2 = ifelse(is.null(se$v1_th2), NA, se$v1_th2), SEv1_th3 = ifelse(is.null(se$v1_th3), NA, se$v1_th3), SEv1_th4 = ifelse(is.null(se$v1_th4), NA, se$v1_th4), SEv1_th5 = ifelse(is.null(se$v1_th5), NA, se$v1_th5), SEv1_th6 = ifelse(is.null(se$v1_th6), NA, se$v1_th6), SEv1_th7 = ifelse(is.null(se$v1_th7), NA, se$v1_th7), SEv1_th8 = ifelse(is.null(se$v1_th8), NA, se$v1_th8), SEv1_th9 = ifelse(is.null(se$v1_th9), NA, se$v1_th9), SEv2_th1 = ifelse(is.null(se$v2_th1), NA, se$v2_th1), SEv2_th2 = ifelse(is.null(se$v2_th2), NA, se$v2_th2), SEv2_th3 = ifelse(is.null(se$v2_th3), NA, se$v2_th3), SEv2_th4 = ifelse(is.null(se$v2_th4), NA, se$v2_th4), SEv2_th5 = ifelse(is.null(se$v2_th5), NA, se$v2_th5), SEv2_th6 = ifelse(is.null(se$v2_th6), NA, se$v2_th6), SEv2_th7 = ifelse(is.null(se$v2_th7), NA, se$v2_th7), SEv2_th8 = ifelse(is.null(se$v2_th8), NA, se$v2_th8), SEv2_th9 = ifelse(is.null(se$v2_th9), NA, se$v2_th9), status = corFit$output$status$code) }) # end of the correlation calculations # Now format the output results <- as.data.frame(t(mat)) results$covWeights <- sqrt(results$pairNs-1)/results$covSE results$var1Weights <- sqrt(results$pairNs-1)/results$var1SE results$var2Weights <- sqrt(results$pairNs-1)/results$var2SE results$mu1Weights <- sqrt(results$pairNs-1)/results$mu1SE results$mu2Weights <- sqrt(results$pairNs-1)/results$mu2SE results$v1t1Weights <- sqrt(results$v1N-1)/results$SEv1_th1 results$v1t2Weights <- sqrt(results$v1N-1)/results$SEv1_th2 results$v1t3Weights <- sqrt(results$v1N-1)/results$SEv1_th3 results$v1t4Weights <- sqrt(results$v1N-1)/results$SEv1_th4 results$v1t5Weights <- sqrt(results$v1N-1)/results$SEv1_th5 results$v1t6Weights <- sqrt(results$v1N-1)/results$SEv1_th6 results$v1t7Weights <- sqrt(results$v1N-1)/results$SEv1_th7 results$v1t8Weights <- sqrt(results$v1N-1)/results$SEv1_th8 results$v1t9Weights <- sqrt(results$v1N-1)/results$SEv1_th9 results$v2t1Weights <- sqrt(results$v2N-1)/results$SEv2_th1 results$v2t2Weights <- sqrt(results$v2N-1)/results$SEv2_th2 results$v2t3Weights <- sqrt(results$v2N-1)/results$SEv2_th3 results$v2t4Weights <- sqrt(results$v2N-1)/results$SEv2_th4 results$v2t5Weights <- sqrt(results$v2N-1)/results$SEv2_th5 results$v2t6Weights <- sqrt(results$v2N-1)/results$SEv2_th6 results$v2t7Weights <- sqrt(results$v2N-1)/results$SEv2_th7 results$v2t8Weights <- sqrt(results$v2N-1)/results$SEv2_th8 results$v2t9Weights <- sqrt(results$v2N-1)/results$SEv2_th9 results$v1name <- t(pair)[,1] results$v2name <- t(pair)[,2] sel1 <- c("v1N","var1","var1SE","var1Weights","mu1", "mu1SE", "mu1Weights", "v1_th1", "v1_th2", "v1_th3", "v1_th4", "v1_th5", "v1_th6", "v1_th7", "v1_th8", "v1_th9", "SEv1_th1", "SEv1_th2", "SEv1_th3","SEv1_th4", "SEv1_th5", "SEv1_th6", "SEv1_th7", "SEv1_th8", "SEv1_th9", "v1t1Weights", "v1t2Weights", "v1t3Weights", "v1t4Weights", "v1t5Weights","v1t6Weights","v1t7Weights","v1t8Weights","v1t9Weights", "v1name") sel2 <- c("v2N", "var2","var2SE","var2Weights","mu2", "mu2SE", "mu2Weights", "v2_th1", "v2_th2", "v2_th3", "v2_th4", "v2_th5", "v2_th6", "v2_th7", "v2_th8", "v2_th9", "SEv2_th1", "SEv2_th2", "SEv2_th3", "SEv2_th4","SEv2_th5","SEv2_th6","SEv2_th7","SEv2_th8","SEv2_th9", "v2t1Weights", "v2t2Weights", "v2t3Weights", "v2t4Weights", "v2t5Weights","v2t6Weights","v2t7Weights","v2t8Weights","v2t9Weights","v2name") sels <- c("N", "var","varSE","varW","mu", "muSE", "muW", "th1", "th2", "th3", "th4","th5","th6","th7","th8","th9", "SE_th1", "SE_th2", "SE_th3", "SE_th4", "SE_th5","SE_th6","SE_th7","SE_th8","SE_th9", "t1Weights", "t2Weights", "t3Weights", "t4Weights", "t5Weights", "t6Weights", "t7Weights", "t8Weights", "t9Weights", "name") sum.stats <- matrix(NA , nVars, 34, dimnames = list(UseVars, sels[1:34])) for(i in 1:nVars){ x <- subset(results, results$v1name == UseVars[i], sel1); colnames(x) <- sels y <- subset(results, results$v2name == UseVars[i], sel2); colnames(y) <- sels set <- rbind(x,y) for(j in 1:34){ sum.stats[i,j] <- mean(set[,j], trim = .1, na.rm = T) } } sum.stats <- as.data.frame(sum.stats) covariances <- results$cov21 variance <- sum.stats$var varW <- ifelse(is.nan(sum.stats$varW), 0, sum.stats$varW) mu <- t(as.matrix(ifelse(is.nan(sum.stats$mu), 0,sum.stats$mu))); colnames(mu) <- UseVars muW <- t(as.matrix(ifelse(is.nan(sum.stats$muW), 0,sum.stats$muW))); colnames(muW) <- UseVars Thr1 <- sum.stats$th1 Thr2 <- sum.stats$th2 Thr3 <- sum.stats$th3 Thr4 <- sum.stats$th4 Thr5 <- sum.stats$th5 Thr6 <- sum.stats$th6 Thr7 <- sum.stats$th7 Thr8 <- sum.stats$th8 Thr9 <- sum.stats$th9 THRESH <- as.data.frame(rbind(Thr1, Thr2, Thr3, Thr4, Thr5, Thr6, Thr7, Thr8, Thr9))[1:ords$maxnthresh,] covSEs <- results$covSE Thr1SEs <- sum.stats$SE_th1 Thr2SEs <- sum.stats$SE_th2 Thr3SEs <- sum.stats$SE_th3 Thr4SEs <- sum.stats$SE_th4 Thr5SEs <- sum.stats$SE_th5 Thr6SEs <- sum.stats$SE_th6 Thr7SEs <- sum.stats$SE_th7 Thr8SEs <- sum.stats$SE_th8 Thr9SEs <- sum.stats$SE_th9 colnames(THRESH) <- UseVars THRESH <- THRESH[,ords$isord] THRESHse <- as.data.frame(rbind( Thr1SEs, Thr2SEs, Thr3SEs, Thr4SEs, Thr5SEs, Thr6SEs, Thr7SEs, Thr8SEs, Thr9SEs))[1:ords$maxnthresh,] covWs <- results$covWeights Thr1Ws <- sum.stats$t1Weights Thr2Ws <- sum.stats$t2Weights Thr3Ws <- sum.stats$t3Weights Thr4Ws <- sum.stats$t4Weights Thr5Ws <- sum.stats$t5Weights Thr6Ws <- sum.stats$t6Weights Thr7Ws <- sum.stats$t7Weights Thr8Ws <- sum.stats$t8Weights Thr9Ws <- sum.stats$t9Weights colnames(THRESHse) <- UseVars THRESHse <- THRESHse[,ords$isord] THRESHw <- as.data.frame(rbind(Thr1Ws, Thr2Ws, Thr3Ws, Thr4Ws, Thr5Ws, Thr6Ws, Thr7Ws, Thr8Ws, Thr9Ws))[1:ords$maxnthresh,] colnames(THRESHw) <- UseVars THRESHw <- THRESHw[,ords$isord] Covs <- matrix(0, nVars, nVars) Covs[lower.tri(Covs)] <- covariances Cov <- Covs + t(Covs) + diag(variance) dimnames(Cov) <- list(UseVars, UseVars) for(k in 1:dim(Cov)[1]){ifelse(is.nan(Cov[k,k]),Cov[k,k]<- 1, Cov[k,k]<- Cov[k,k])} CovWei <- matrix(0, nVars, nVars) CovWei[lower.tri(CovWei)] <- covWs CovWeis <- CovWei + t(CovWei) + diag(varW) dimnames(CovWeis) <- list(UseVars, UseVars) return(list(covMatrix = Cov, covWeights = CovWeis, meanMatrix = mu, meanWeights = muW, thresholds = THRESH, thrWeights = THRESHw, nThresh = ords$nthresh, results = results)) } # Estimate the covariance between the items and the snps (0, 1, 2) - used by other functions vecCors <- function(manData, snpData, FacCovFit){ ordtest <- apply(manData, 2, function(x) length(unique(x))) for (ord in 1:dim(manData)[2]) { if (ordtest[ord] < 11) manData[ord] <- mxFactor((manData[,ord]),levels=c(min(manData[ord], na.rm = T ):max(manData[ord], na.rm = T ))) else manData[ord] <- manData[ord] } ords<- countOrdinals(as.data.frame(manData)) data <- manData thresholds <- FacCovFit$thresholds Mus <- FacCovFit$meanMatrix VARS <- diag(FacCovFit$covMatrix) snpCovs <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) COVse <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) CovTs <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) pairNs <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) snpNs <- matrix(NA, dim(snpData)[2], 1, dimnames = list(colnames(snpData), "NS")) snpMus <- matrix(NA, dim(snpData)[2], 1, dimnames = list(colnames(snpData), "Mus")) snpVar <- matrix(NA, dim(snpData)[2], 1, dimnames = list(colnames(snpData), "Vars")) for(i in 1:length(colnames(manData))){ for(j in 1:length(colnames(snpData))){ dat1 <- data.frame(snpData[,j]) colnames(dat1) <- colnames(snpData[j]) dat <- data.frame(manData[,i], dat1) colnames(dat) <- c(colnames(manData[i]), colnames(snpData[j])) dat <- dat[complete.cases(dat),] selVars <- colnames(dat) ORDS <- countOrdinals(as.data.frame(dat)) ordVars <- ORDS$ordnameList[1:ORDS$nordinal] conVars <- ORDS$contnameList[1:ORDS$ncontinuous] ordVars <- ORDS$ordnameList[1:ORDS$nordinal] Dat <- mxData(observed=dat, type="raw") COVS <- mxMatrix( type="Symm", nrow=2, ncol=2, free=c(F, T, F), values= diag(c(VARS[selVars[1]], var(dat1))), label=c("v1", "c1", "v2"), name="COVS" ) if (ords$nthresh[i]>0) thresh <- mxMatrix( type="Full", nrow=ords$nthresh[1], ncol=1, free=F, values= thresholds[,colnames(manData[i])][1:ords$nthresh[i]], labels=paste("t", 1:ords$nthresh[i], sep = ""), name="thresh" ) if (ords$nthresh[i]>0) thresh <- mxMatrix( type="Full", nrow=ords$nthresh[1], ncol=1, free=F, values= thresholds[,colnames(manData[i])][1:ords$nthresh[i]], labels=paste("t", 1:ords$nthresh[i], sep = ""), name="thresh" ) Mean <- mxMatrix(type="Full", nrow=1, ncol=2, free=F, values= c(Mus[,selVars[1]], mean(dat[,2])), label=c("Mu1", "Mu2"), name="Mean") expFunCC <- mxExpectationNormal(covariance="COVS", means="Mean", dimnames = selVars) expFunCO <- mxExpectationNormal(covariance="COVS", means="Mean", thresholds = "thresh", dimnames = selVars, threshnames = ordVars) fitFunction <- mxFitFunctionML() if (sum(ORDS$isord)==0) obj <- expFunCC else obj <- expFunCO if (sum(ORDS$isord)<1) pars <- c(Dat, COVS, Mean, fitFunction) else pars <- c(Dat, COVS, Mean, thresh, fitFunction) corModel <- mxModel("cor", pars, obj) corFit <- mxRun(corModel, unsafe = T, silent = T) summary(corFit) est <- as.data.frame(t(corFit@output$estimate)) ses <- as.data.frame(t(corFit@output$standardErrors)) ; colnames(ses) <- names(corFit@output$estimate) p <- mean(dat[,2])/2 sx <- 2*p*(1-p) sy <- ifelse(!is.null(est$v1), sqrt(est$v1), 1) snpCovs[j,i] <- est$c1 COVse[j,i] <- ifelse(!is.null(ses$c1), ses$c1, 1) pairNs[j,i] <- sum(complete.cases(dat)) snpNs[j,] <- sum(!is.na(dat[,2])) snpMus[j,] <- 2 * p snpVar[j,] <- sx } } snpMse <- sqrt(snpVar)/sqrt(snpNs) snpVarSE <- snpVar * sqrt(2/(snpNs-1)) COVw <- sqrt(pairNs-1)/COVse snpMw <- sqrt(snpNs-1)/snpMse snpVw <- sqrt(snpNs-1)/snpVarSE return(list(covVector = snpCovs, covSEs = COVse, covWeights = COVw, vars = snpVar, varSEs = snpVarSE, varWs = snpVw, means = snpMus, meanSEs = snpMse, meanWs = snpMw)) } # This is the vecCors Function where the means and variances are freed for the ordinal varibles, and the first two factor loadings are fixed at zero and 1. vecCors01 <- function(manData, snpData, FacCovFit){ ordtest <- apply(manData, 2, function(x) length(unique(x))) for (ord in 1:dim(manData)[2]) { if (ordtest[ord] < 11) manData[ord] <- mxFactor((manData[,ord]),levels=c(min(manData[ord], na.rm = T ):max(manData[ord], na.rm = T ))) else manData[ord] <- manData[ord] } ords<- countOrdinals(as.data.frame(manData)) data <- manData thresholds <- FacCovFit$thresholds Mus <- FacCovFit$meanMatrix VARS <- diag(FacCovFit$covMatrix) if (ords$maxnthresh>0) thresholds[1,] <- 0 if (ords$maxnthresh>1) thresholds[2,] <- 1 snpCovs <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) COVse <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) CovTs <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) pairNs <- matrix(NA, dim(snpData)[2], length(colnames(data)), dimnames = list(colnames(snpData), colnames(manData))) snpNs <- matrix(NA, dim(snpData)[2], 1, dimnames = list(colnames(snpData), "NS")) snpMus <- matrix(NA, dim(snpData)[2], 1, dimnames = list(colnames(snpData), "Mus")) snpVar <- matrix(NA, dim(snpData)[2], 1, dimnames = list(colnames(snpData), "Vars")) for(i in 1:length(colnames(manData))){ for(j in 1:length(colnames(snpData))){ dat1 <- data.frame(snpData[,j]) colnames(dat1) <- colnames(snpData[j]) dat <- data.frame(manData[,i], dat1) colnames(dat) <- c(colnames(manData[i]), colnames(snpData[j])) dat <- dat[complete.cases(dat),] selVars <- colnames(dat) ORDS <- countOrdinals(as.data.frame(dat)) ordVars <- ORDS$ordnameList[1:ORDS$nordinal] conVars <- ORDS$contnameList[1:ORDS$ncontinuous] ordVars <- ORDS$ordnameList[1:ORDS$nordinal] Dat <- mxData(observed=dat, type="raw") COVS <- mxMatrix( type="Symm", nrow=2, ncol=2, free=c(F, T, F), values= diag(c(VARS[selVars[1]], var(dat1))), label=c("v1", "c1", "v2"), name="COVS" ) if (ords$nthresh[i]>0) thresh <- mxMatrix( type="Full", nrow=ords$nthresh[1], ncol=1, free=F, values= thresholds[,colnames(manData[i])][1:ords$nthresh[i]], labels=paste("t", 1:ords$nthresh[i], sep = ""), name="thresh" ) if (ords$nthresh[i]>0) thresh <- mxMatrix( type="Full", nrow=ords$nthresh[1], ncol=1, free=F, values= thresholds[,colnames(manData[i])][1:ords$nthresh[i]], labels=paste("t", 1:ords$nthresh[i], sep = ""), name="thresh" ) Mean <- mxMatrix(type="Full", nrow=1, ncol=2, free=F, values= c(Mus[,selVars[1]], mean(dat[,2])), label=c("Mu1", "Mu2"), name="Mean") expFunCC <- mxExpectationNormal(covariance="COVS", means="Mean", dimnames = selVars) expFunCO <- mxExpectationNormal(covariance="COVS", means="Mean", thresholds = "thresh", dimnames = selVars, threshnames = ordVars) fitFunction <- mxFitFunctionML() if (sum(ORDS$isord)==0) obj <- expFunCC else obj <- expFunCO if (sum(ORDS$isord)<1) pars <- c(Dat, COVS, Mean, fitFunction) else pars <- c(Dat, COVS, Mean, thresh, fitFunction) corModel <- mxModel("cor", pars, obj) corFit <- mxRun(corModel, unsafe = T, silent = T) #summary(corFit) est <- as.data.frame(t(corFit@output$estimate)) ses <- as.data.frame(t(corFit@output$standardErrors)) ; colnames(ses) <- names(corFit@output$estimate) p <- mean(dat[,2])/2 sx <- 2*p*(1-p) sy <- ifelse(!is.null(est$v1), sqrt(est$v1), 1) snpCovs[j,i] <- est$c1 COVse[j,i] <- ifelse(!is.null(ses$c1), ses$c1, 1) pairNs[j,i] <- sum(complete.cases(dat)) snpNs[j,] <- sum(!is.na(dat[,2])) snpMus[j,] <- 2 * p snpVar[j,] <- sx } } snpMse <- sqrt(snpVar)/sqrt(snpNs) snpVarSE <- snpVar * sqrt(2/(snpNs-1)) COVw <- sqrt(pairNs-1)/COVse snpMw <- sqrt(snpNs-1)/snpMse snpVw <- sqrt(snpNs-1)/snpVarSE return(list(covVector = snpCovs, covSEs = COVse, covWeights = COVw, vars = snpVar, varSEs = snpVarSE, varWs = snpVw, means = snpMus, meanSEs = snpMse, meanWs = snpMw)) } # This function basically conducts a GWAS for each phenotype (item and covariate) # Loop function to calculate the snp-item correlations for 0, 1, 2 genotypes snpCovs <- function(FacModelData, vars, covariates = NULL, SNPdata, output = "SNP", zeroOne = FALSE, runs = NA, inc = 1000 , start = 1){ facDataFull <- read.table(FacModelData, header = T) facData <- subset(facDataFull, select = c(vars, covariates)) ifelse(zeroOne, FacModelFit <- facCov01(facData), FacModelFit <- facCov(facData)) FacVars <- colnames(FacModelFit$covMatrix) facData <- subset(read.table(FacModelData, header = T), select = FacVars) nSNP <- as.numeric(system(paste("sed -n '2'p", SNPdata, "| wc -w"), intern = T)) # Number of SNPs st <- seq(start, nSNP, by=inc) fi <- seq(start+inc, nSNP, by=inc)-1 if( fi[length(fi)] != nSNP) fi <- c(fi,nSNP) if(is.na(runs)) runs<- length(fi) #write.table( "Getting Data from the File", "getting.txt", append = F ) #write.table( "Running the Analysis", "anal.txt", append = F ) for(rs in 1:runs){ print(rs) print(Sys.time()) stBin <- st[rs] fiBin <- fi[rs] #startOFData <- Sys.time() system(paste("cut -d ' ' -f", paste(stBin, "-", fiBin, sep=""), SNPdata, "> tmp.txt") ) snp <- read.table("tmp.txt", header = T) #endOFData <- Sys.time() ifelse(zeroOne, vecOFcovs <- vecCors01(facData, snp, FacModelFit), vecOFcovs <- vecCors(facData, snp, FacModelFit)) #endOFanal <- Sys.time() #write.table( endOFData-startOFData, "getting.txt", append = T ) #write.table(endOFanal-endOFData, "anal.txt", append = T ) covOut <- as.matrix(cbind(vecOFcovs$covVector, vecOFcovs$vars, vecOFcovs$means)) covSE <- as.matrix(cbind(vecOFcovs$covSEs, vecOFcovs$varSEs, vecOFcovs$meanSEs)) COVweights <- as.matrix(cbind(vecOFcovs$covWeights, vecOFcovs$varWs, vecOFcovs$meanWs)) colnames(COVweights) <- colnames(covOut) if (stBin ==1) write.table(covOut, paste(output, "cov.txt", sep = "")) else write.table(covOut,paste(output, "cov.txt", sep = ""), append = T, col.names = F) if (stBin ==1) write.table(covSE, paste(output, "se.txt", sep = "")) else write.table(covSE, paste(output, "se.txt", sep = ""), append = T, col.names = F) if (stBin ==1) write.table(COVweights, paste(output, "wei.txt", sep = "")) else write.table(COVweights, paste(output, "wei.txt", sep = ""), append = T, col.names = F) } } # This function conducts the GWAS of One Factor using DWLS gwasDWLS <- function(itemData, snpCov, snpWei, VarNames = colnames(dataset), covariates = NULL, runs = NA, output = "DWLSout.txt", inc = 1000){ DATA <- subset(read.table(itemData, header = T), select = c(VarNames, covariates)) FacModelFit <- facCov(DATA) ObsCov <- FacModelFit$covMatrix ObsMean <- FacModelFit$meanMatrix CovWei <- FacModelFit$covWeights MeanWei <- FacModelFit$meanWeights FacVars <- colnames (ObsCov) nSNP <- as.numeric(system(paste("wc -l <", snpCov), intern = T))-1 # Number of SNPs selVars <- colnames(ObsCov) st <- seq(1, nSNP, by=inc) fi <- seq(inc, nSNP, by=inc) if( fi[length(fi)] != nSNP) fi[length(fi)+1] <- nSNP if(is.na(runs)) runs<- length(fi) nItems <- length(c(VarNames)) nFac <- 1 nCovariates <- length(covariates) a1 <- matrix(0, nItems+nCovariates+nFac, nItems+nCovariates+nFac) dimnames(a1) <- list(c(VarNames,covariates, "Fac"),c(VarNames,covariates, "Fac")) a1[VarNames, "Fac"] <- .5 a1[VarNames,covariates] <- .1 alab1 <- matrix("Null", nItems+nCovariates+nFac, nItems+nCovariates+nFac) dimnames(alab1) <- list(c(VarNames,covariates, "Fac"),c(VarNames,covariates, "Fac")) alab1[VarNames, "Fac"]<- paste("l" , 1:length(VarNames), sep = "") alab1[VarNames,covariates] <- paste(rep(covariates, each= nItems), 1:nItems, sep = "_") aLB1 <- matrix(-2, nItems+nCovariates+nFac, nItems+nCovariates+nFac) dimnames(aLB1) <- list(c(VarNames,covariates, "Fac"),c(VarNames,covariates, "Fac")) aLB1[VarNames[1],"Fac"] <- .01 obsstat <- c(FacModelFit$covMatrix[lower.tri(FacModelFit$covMatrix, diag = T)] , FacModelFit$meanMatrix) wei <- c(FacModelFit$covWeights[lower.tri(FacModelFit$covWeights, diag = T)] , FacModelFit$meanWeights) #obsstat <- c(FacModelFit$covMatrix[lower.tri(FacModelFit$covMatrix, diag = T)] , FacModelFit$meanMatrix, unlist(c(FacModelFit$thresholds))) #wei <- c(FacModelFit$covWeights[lower.tri(FacModelFit$covWeights, diag = T)] , FacModelFit$meanWeights, unlist(c(FacModelFit$thrWeights))) if(is.null(covariates)) SlabsP <- c(paste("var", 1:(nItems), sep = ""), "Fvar") if(!is.null(covariates)) SlabsP <- c(paste("var", 1:(nItems), sep = ""), paste("con", 1:(nCovariates), sep = ""), "Fvar") if(is.null(covariates)) MlabsP <- paste("muItem", 1:(nItems), sep = "") if(!is.null(covariates)) MlabsP <- c(paste("muItem", 1:(nItems), sep = ""), paste("muCon", 1:(nCovariates), sep = "")) A <- mxMatrix(type = "Full", values = a1, nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), free = a1!=0, labels = alab1, lbound = aLB1, name = "A") S <- mxMatrix(type = "Diag", values = 1, nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), free = c(FacModelFit$nThresh==0, F) , labels = SlabsP, name = "S") # not very general II <- mxMatrix(type = "Iden", nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(nItems+nCovariates), matrix(0, nItems+nCovariates, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF %&% (solve(II - A)%&% (S) ), "expectedCov") expectedMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = (nItems+nCovariates), free = c(FacModelFit$nThresh==0), labels = MlabsP, name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") # Objective Function for DWLS FactorModel <- mxModel("DWLS", A, S, II, FF, expectedCov, expectedMean, vec, obsvec, weights, dwls, DWLSobj) FactorPre <- mxRun(FactorModel, unsafe = T, silent = T) #summary(FactorPre) pre <- matrix(c(NA , NA, NA, NA, length(FactorPre@output$estimate), FactorPre@output$status$code, NA, NA), 1, 8) colnames(pre) <- c("snp", "snpSE", "t-statistic", "p-value", "df", "status", "MuSnp", "VarSnp") # pre <- matrix(c(NA, NA , NA,NA, NA , NA, FactorPre@output$minimum, length(FactorPre@output$estimate), FactorPre@output$status$code), 1, 10) # colnames(pre) <- c("snp", "SNPvar", "SNPmean", "snpSE", "SNPvarSE", "SNPmeanSE", "minimum", "df", "status") write.table(pre,output, append = F, row.names = "Null") for(rs in 1:runs){ print(rs) stBin <- st[rs] +1 fiBin <- fi[rs] +1 system(paste("awk '{print $0}'", snpCov, "| head -1 > covTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpCov, ">> covTMP.txt")) system(paste("awk '{print $0}'", snpWei, "| head -1 > weiTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpWei, ">> weiTMP.txt")) WEIS <- read.table("weiTMP.txt") COVS <- read.table("covTMP.txt") COVS <- COVS[rowSums(is.na(WEIS))<3,] WEIS <- WEIS[rowSums(is.na(WEIS))<3,] WEIS[is.na(WEIS)] <- .1 snp2facDWLS <- matrix(NA, dim(WEIS)[1], 6) estimates <- matrix(NA, dim(WEIS)[1], length(FactorPre@output$estimate)+3) for(fit in 1:dim(WEIS)[1]){ a <- matrix(0, nItems+nCovariates+nFac+1, nItems+nCovariates+nFac+1) dimnames(a) <- list(c(VarNames,covariates, "snp","Fac"),c(VarNames,covariates, "snp","Fac")) a[VarNames, "Fac"] <- .5 a[VarNames, covariates] <- .1 a["Fac","snp"] <- .1 alab <- matrix("Null", nItems+nCovariates+nFac+1, nItems+nCovariates+nFac+1) dimnames(alab) <- list(c(VarNames,covariates, "snp","Fac"),c(VarNames,covariates, "snp","Fac")) alab[VarNames, "Fac"]<- paste("l" , 1:nItems, sep = "") alab[VarNames, covariates] <- paste(rep(covariates, each= nItems), 1:nItems, sep = "_") alab["Fac","snp"] <- "snp" aLB <- matrix(-2, nItems+nCovariates+nFac+1, nItems+nCovariates+nFac+1) dimnames(aLB) <- list(c(VarNames,covariates, "snp","Fac"),c(VarNames,covariates, "snp","Fac")) aLB[VarNames[1],"Fac"] <- .01 Cov <- cbind(rbind(ObsCov, COVS[fit, 1:(nItems+nCovariates)]), t(COVS[fit, 1:(nItems+nCovariates+1)])) Wei <- cbind(rbind(CovWei, WEIS[fit, 1:(nItems+nCovariates)]), t(WEIS[fit, 1:(nItems+nCovariates+1)])) Means <- c(ObsMean, COVS[fit, nItems+2]) MeansWeight <- c(MeanWei, WEIS[fit, nItems+2]) obsstat <- c(Cov[lower.tri(Cov, diag = T)] , Means) wei <- c(Wei[lower.tri(Wei, diag = T)] , MeansWeight) if(is.null(covariates)) Slabs <- c(paste("var", 1:(nItems), sep = ""), "SNPvar", "Fvar") if(!is.null(covariates)) Slabs <- c(paste("var", 1:(nItems), sep = ""), paste("con", 1:(nCovariates), sep = ""), "SNPvar", "Fvar") if(is.null(covariates)) Mlabs <- c(paste("mean", VarNames, sep = "_"), "SNPmean") if(!is.null(covariates)) Mlabs <- c(paste("mean", VarNames, sep = "_"), paste("mean", covariates, sep = "_"), "SNPmean") A <- mxMatrix(type = "Full", values = a, nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, free = a!=0, labels = alab, lbound = aLB, name = "A") S <- mxMatrix(type = "Diag", values = 1, nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, free = c(FacModelFit$nThresh==0, T, F) , labels = Slabs , name = "S") II <- mxMatrix(type = "Iden", nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(nItems+nCovariates+1), matrix(0, nItems+nCovariates+1, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF%&% (solve(II - A)%&% (S )), "expectedCov") expectedMean <- mxMatrix("Full", values = Means, nrow = 1, ncol = nItems+nCovariates+1, free = c(FacModelFit$nThresh==0,T), labels = c(paste("mean", VarNames, sep = "_"), paste("mean", covariates, sep = "_"), "SNPmean"), name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") ordModelDWLS <- mxModel("DWLS", A, S, II, FF, expectedCov, expectedMean, vec, obsvec, weights, dwls, DWLSobj) ordFitDWLS <- mxRun(ordModelDWLS, unsafe = T, silent = T) #ordFitDWLSnull <- omxSetParameters(ordFitDWLS, labels = "snp", free = F, name = "null") #ordFitDWLSnull <- mxRun(ordFitDWLSnull, unsafe = T, silent = T ) params <- as.vector(ordFitDWLS@output$estimate[c("snp")]) SES <- as.vector(t(ordFitDWLS@output$standardErrors)[,c("snp")]) #snp2facDWLS[fit,] <- c(params, SES, ordFitDWLS@output$minimum, ordFitDWLSnull@output$minimum,length(ordFitDWLS@output$estimate), ordFitDWLS@output$status$code) snp2facDWLS[fit,] <- c(params, SES, params/SES, 2*pnorm(-abs(params/SES)), length(ordFitDWLS@output$estimate), ordFitDWLS@output$status$code) estimates[fit,] <- ordFitDWLS@output$estimate colnames(estimates) <- names(ordFitDWLS@output$estimate) } # close fit rownames(snp2facDWLS) <- rownames(COVS) snp2facDWLS <- cbind(snp2facDWLS, COVS$Mus, COVS$Vars) write.table(snp2facDWLS,output, append = T, col.names = F, row.names = T) rownames(estimates) <- rownames(COVS) if(rs==1) {write.table(estimates,"estimates.txt", append = F, col.names = T, row.names = T)} else{ write.table(estimates,"estimates.txt", append = T, col.names = F, row.names = T)} } # close rs } # This function conducts the the GWAS on the residuals of the factor model items resDWLS <- function(itemData, snpCov, snpWei, VarNames = colnames(dataset), covariates = NULL, resids = VarNames, factor = F, runs = NA, output = "DWLSres.txt", inc = 1000){ if(length(resids)+ sum(factor)>length(VarNames)) stop("The Model is not Identified if you have a regression on the latent factor and ALL of the residuals") nVars <- length(VarNames) nFac <- 1 nCovariates <- length(covariates) nGen <- 1 ntv <- nVars+nCovariates+nGen+nFac dims <- c(VarNames, covariates, "SNP", "Fac") predims <- c(VarNames, covariates, "Fac") DATA <- subset(read.table(itemData, header = T), select = c(VarNames,covariates)) FacModelFit <- facCov(DATA) ObsCov <- FacModelFit$covMatrix ObsMean <- FacModelFit$meanMatrix CovWei <- FacModelFit$covWeights MeanWei <- FacModelFit$meanWeights FacVars <- colnames (ObsCov) nSNP <- as.numeric(system(paste("wc -l <", snpCov), intern = T))-1 # Number of SNPs nItems <- length(c(VarNames)) selVars <- colnames(ObsCov) st <- seq(1, nSNP, by=inc) fi <- seq(inc, nSNP, by=inc) if( fi[length(fi)] != nSNP) fi[length(fi)+1] <- nSNP if(is.na(runs)) runs<- length(fi) a1 <- matrix(0, ntv-nGen, ntv-nGen, dimnames = list(predims, predims)) a1[VarNames,"Fac" ] <- .5 a1[VarNames,covariates ] <- .1 alab1 <- matrix("Null", ntv-nGen, ntv-nGen, dimnames = list(predims, predims)) alab1[VarNames,"Fac"] <- paste("l" , 1:nVars, sep = "") alab1[VarNames,covariates] <- paste(rep(covariates, each= nItems), 1:nItems, sep = "_") aLB1 <- matrix(-2, nItems+nCovariates+nFac, nItems+nCovariates+nFac) dimnames(aLB1) <- list(c(VarNames,covariates, "Fac"),c(VarNames,covariates, "Fac")) aLB1[VarNames[1],"Fac"] <- .01 obsstat <- c(FacModelFit$covMatrix[lower.tri(FacModelFit$covMatrix, diag = T)] , FacModelFit$meanMatrix) wei <- c(FacModelFit$covWeights[lower.tri(FacModelFit$covWeights, diag = T)] , FacModelFit$meanWeights) A <- mxMatrix(type = "Full", values = a1, nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), free = a1!=0, labels = alab1, lbound = aLB1, name = "A") S <- mxMatrix(type = "Diag", values = 1, nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), free = c(FacModelFit$nThresh==0, F) , labels = c(paste("var", 1:(nItems), sep = ""), paste("con", 1:(nCovariates), sep = ""), "Fvar") , name = "S") # not very general II <- mxMatrix(type = "Iden", nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(nItems+nCovariates), matrix(0, nItems+nCovariates, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF %&% (solve(II - A)%&% (S) ), "expectedCov") expectedMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = (nItems+nCovariates), free = c(FacModelFit$nThresh==0), labels = c(paste("muItem", 1:nItems, sep = ""),paste("muCon", 1:nCovariates, sep = "")), name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") # Objective Function for DWLS FactorModel <- mxModel("DWLS", A, S, II, FF, expectedCov, expectedMean, vec, obsvec, weights, dwls, DWLSobj) FactorPre <- mxRun(FactorModel, unsafe = T, silent = T) #summary(FactorPre) #lds <- omxGetParameters(FactorPre)[paste("l" , 1:nVars, sep = "")] #rss <- omxGetParameters(FactorPre)[paste("var", predims[1:length(predims)-1], sep = "")] #mus <- omxGetParameters(FactorPre)["facMean"] if (factor) {pre <- matrix(c(NA, NA, rep(NA, length(resids)*4 ), length(FactorPre@output$estimate), FactorPre@output$status$code, NA,NA), 1, (length(resids)*4 + 4 + 2))} else { pre <- matrix(c(rep(NA, length(resids)*4 ), length(FactorPre@output$estimate), FactorPre@output$status$code, NA,NA), 1, (length(resids)*4 + 4)) } if (factor) {colnames(pre) <- c("snp2fac", paste("snp", resids, sep="_"), "SE_fac", paste("SE", resids, sep="_"), "t_fac", paste("t", resids, sep="_"), "p_fac", paste("p", resids, sep="_"), "df", "status", "SnpMu", "SnpVar")} else {colnames(pre) <- c(paste("snp", resids, sep="_"), paste("SE", resids, sep="_"), paste("t", resids, sep="_"),paste("p", resids, sep="_"), "df", "status", "SnpMu", "SnpVar")} # pre <- matrix(c(NA, NA , NA,NA, NA , NA, FactorPre@output$minimum, length(FactorPre@output$estimate), FactorPre@output$status$code), 1, 10) # colnames(pre) <- c("snp", "SNPvar", "SNPmean", "snpSE", "SNPvarSE", "SNPmeanSE", "minimum", "df", "status") write.table(pre,output, append = F, row.names = "Null") for(rs in 1:runs){ print(rs) stBin <- st[rs] +1 fiBin <- fi[rs] +1 system(paste("awk '{print $0}'", snpCov, "| head -1 > covTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpCov, ">> covTMP.txt")) system(paste("awk '{print $0}'", snpWei, "| head -1 > weiTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpWei, ">> weiTMP.txt")) WEIS <- read.table("weiTMP.txt") COVS <- read.table("covTMP.txt") COVS <- COVS[rowSums(is.na(WEIS))<3,] WEIS <- WEIS[rowSums(is.na(WEIS))<3,] WEIS[is.na(WEIS)] <- .1 if (factor) {snp2facDWLS <- matrix(NA, dim(WEIS)[1], (length(resids)*4 + 2 + 2))} else {snp2facDWLS <- matrix(NA, dim(WEIS)[1], (length(resids)*4 + 2))} estimates <- matrix(NA, dim(WEIS)[1], length(FactorPre@output$estimate)+ length(resids) + sum(factor)+ 2) for(fit in 1:dim(WEIS)[1]){ a <- matrix(0, ntv, ntv, dimnames = list(dims, dims)) a[VarNames,"Fac" ] <- .5 a[VarNames,covariates ] <- .1 a[resids,"SNP"] <- .01 if (factor) a["Fac","SNP"] <- .01 alab <- matrix("Null", ntv, ntv, dimnames = list(dims, dims)) alab[VarNames,"Fac" ] <- paste("l" , 1:nVars, sep = "") alab[VarNames,covariates ] <- paste(covariates, VarNames, sep = "_") alab[resids,"SNP"] <- paste("snp", resids, sep = "_") alab[VarNames,covariates] <- paste(rep(covariates, each= nItems), 1:nItems, sep = "_") if (factor) alab["Fac","SNP"] <- "snp2fac" aLB <- matrix(-2, nItems+nCovariates+nFac+1, nItems+nCovariates+nFac+1) dimnames(aLB) <- list(c(VarNames,covariates, "snp","Fac"),c(VarNames,covariates, "snp","Fac")) aLB[VarNames[1],"Fac"] <- .01 genSnpCov <- matrix(COVS[fit, c(VarNames,covariates)], 1, (nVars + nCovariates), dimnames = list ( rownames(COVS[fit,]),colnames(ObsCov))) genCov <- matrix(COVS[fit, "Vars"], 1, 1, dimnames = list(rownames(genSnpCov),rownames(genSnpCov))) Cov <- cbind(rbind(ObsCov, genSnpCov), t(cbind(genSnpCov,genCov))) genSnpWei <- matrix(WEIS[fit, c(VarNames,covariates)], 1, (nVars + nCovariates), dimnames = list ( rownames(COVS[fit,]),colnames(ObsCov))) genWei <- matrix(WEIS[fit, "Vars"], 1, 1, dimnames = list(rownames(genSnpWei),rownames(genSnpWei))) Wei <- cbind(rbind(CovWei, genSnpWei), t(cbind(genSnpWei,genWei))) Means <- c(ObsMean, COVS[fit, c("Mus")]) MeansWeight <- c(MeanWei, WEIS[fit, c("Mus")]) obsstat <- c(unlist(Cov[lower.tri(Cov, diag = T)]), Means) wei <- c(unlist(Wei[lower.tri(Wei, diag = T)]) , MeansWeight) A <- mxMatrix(type = "Full", values = a, nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, free = a!=0, labels = alab, lbound = aLB, name = "A") S <- mxMatrix(type = "Diag", values = 1, nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, free = c(FacModelFit$nThresh==0, T, F) , labels = c(paste("var", 1:(nItems), sep = ""), paste("var", covariates, sep = "_"), "SNPvar", "FacVar") , name = "S") II <- mxMatrix(type = "Iden", nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(nItems+nCovariates+1), matrix(0, nItems+nCovariates+1, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF%&% (solve(II - A)%&% (S )), "expectedCov") expectedMean <- mxMatrix("Full", values = Means, nrow = 1, ncol = nItems+nCovariates+1, free = c(FacModelFit$nThresh==0,T), labels = c(paste("mean", VarNames, sep = "_"), paste("mean", covariates, sep = "_"), "SNPmean"), name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") ordModelDWLS <- mxModel("DWLS", A, S, II, FF, expectedCov, expectedMean, vec, obsvec, weights, dwls, DWLSobj) ordFitDWLS <- mxRun(ordModelDWLS, unsafe = T, silent = T) #summary(ordFitDWLS) if (factor) {params <- ordFitDWLS$output$estimate[c("snp2fac", paste("snp", resids, sep="_"))] } else {params <- ordFitDWLS$output$estimate[paste("snp", resids, sep="_")]} if (factor) {SES <- ordFitDWLS$output$standardErrors[c("snp2fac", paste("snp", resids, sep="_")),] } else {SES <- ordFitDWLS$output$standardErrors[paste("snp", resids, sep="_"),]} snp2facDWLS[fit,] <- c(params, SES, params/SES, 2*pnorm(-abs(params/SES)), length(ordFitDWLS@output$estimate), ordFitDWLS@output$status$code) estimates[fit,] <- ordFitDWLS@output$estimate colnames(estimates) <- names(ordFitDWLS@output$estimate) } # close fit rownames(snp2facDWLS) <- rownames(COVS) snp2facDWLS <- cbind(snp2facDWLS, COVS$Mus, COVS$Vars) write.table(snp2facDWLS,output, append = T, col.names = F, row.names = T) rownames(estimates) <- rownames(COVS) if(rs==1) {write.table(estimates,"resEstimates.txt", append = F, col.names = T, row.names = T)} else{ write.table(estimates,"resEstimates.txt", append = T, col.names = F, row.names = T)} } # close rs } # This function conducts the GWAS of One Factor using DWLS twofacDWLS <- function(itemData, snpCov, snpWei, f1Names, f2Names, covariates = NULL, runs = NA, output = "DWLSout.txt", inc = 1000){ DATA <- subset(read.table(itemData, header = T), select = c(f1Names, f2Names, covariates)) FacModelFit <- facCov(DATA) ObsCov <- FacModelFit$covMatrix ObsMean <- FacModelFit$meanMatrix CovWei <- FacModelFit$covWeights MeanWei <- FacModelFit$meanWeights FacVars <- colnames (ObsCov) nSNP <- as.numeric(system(paste("wc -l <", snpCov), intern = T))-1 # Number of SNPs selVars <- colnames(ObsCov) st <- seq(1, nSNP, by=inc) fi <- seq(inc, nSNP, by=inc) if( fi[length(fi)] != nSNP) fi[length(fi)+1] <- nSNP if(is.na(runs)) runs<- length(fi) nItems <- length(c(f1Names, f2Names)) nFac <- 2 nCovariates <- length(covariates) ntv <- nItems+nCovariates+nFac a1 <- matrix(0, ntv, ntv) dimnames(a1) <- list(c(f1Names, f2Names,covariates, "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "Fac1", "Fac2")) a1[f1Names, "Fac1"] <- .5 a1[f2Names, "Fac2"] <- .5 a1[c(f1Names, f2Names), covariates] <- .1 alab1 <- matrix("Null", ntv, ntv) dimnames(alab1) <- list(c(f1Names, f2Names,covariates, "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "Fac1", "Fac2")) alab1[f1Names, "Fac1"] <- paste("l1" , 1:length(f1Names), sep = "") alab1[f2Names, "Fac2"] <- paste("l2" , 1:length(f2Names), sep = "") alab1[c(f1Names, f2Names), covariates] <- paste(rep(covariates, each= nItems), 1:nItems, sep = "_") obsstat <- c(FacModelFit$covMatrix[lower.tri(FacModelFit$covMatrix, diag = T)] , FacModelFit$meanMatrix) wei <- c(FacModelFit$covWeights[lower.tri(FacModelFit$covWeights, diag = T)] , FacModelFit$meanWeights) #obsstat <- c(FacModelFit$covMatrix[lower.tri(FacModelFit$covMatrix, diag = T)] , FacModelFit$meanMatrix, unlist(c(FacModelFit$thresholds))) #wei <- c(FacModelFit$covWeights[lower.tri(FacModelFit$covWeights, diag = T)] , FacModelFit$meanWeights, unlist(c(FacModelFit$thrWeights))) s1 <- diag(ntv) dimnames(s1) <- list(c(f1Names, f2Names,covariates, "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "Fac1", "Fac2")) sFree1 <- matrix(F, ntv, ntv, dimnames = list(c(f1Names, f2Names,covariates, "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "Fac1", "Fac2"))) sFree1[s1!=0] <- c(FacModelFit$nThresh==0, F, F) sFree1["Fac1", "Fac2"]<- T sFree1["Fac2", "Fac1"]<- T slab1 <- matrix("Null", ntv, ntv, dimnames = list(c(f1Names, f2Names,covariates, "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "Fac1", "Fac2"))) slab1[s1!=0] <- paste("var", c(f1Names, f2Names,covariates, "Fac1", "Fac2"), sep = "_") slab1["Fac1", "Fac2"]<- "cov12" slab1["Fac2", "Fac1"]<- "cov12" A <- mxMatrix(type = "Full", values = a1, nrow = ntv, ncol = ntv, free = a1!=0, labels = alab1, name = "A") S <- mxMatrix(type = "Full", values = s1, nrow = ntv, ncol = ntv, free = sFree1 , labels = slab1 , name = "S") # not very general II <- mxMatrix(type = "Iden", nrow = ntv, ncol = ntv, name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(ntv-nFac), matrix(0, ntv-nFac, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF %&% (solve(II - A)%&% (S) ), "expectedCov") expectedMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = (nItems+nCovariates), free = c(FacModelFit$nThresh==0), labels = paste("mu", c(f1Names, f2Names,covariates), sep = ""), name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") # Objective Function for DWLS FactorModel <- mxModel("DWLS", A, S, II, FF, expectedCov, expectedMean, vec, obsvec, weights, dwls, DWLSobj) FactorPre <- mxRun(FactorModel, unsafe = T, silent = T) summary(FactorPre) # lds <- omxGetParameters(FactorPre)[1:nItems] # rss <- omxGetParameters(FactorPre)[(nItems+1):(2*nItems)] # mus <- omxGetParameters(FactorPre)[(2*nItems+1):(3*nItems)] pre <- matrix(c(NA , NA, NA , NA, NA , NA, NA , NA, length(FactorPre@output$estimate), FactorPre@output$status$code, NA, NA), 1, 12) colnames(pre) <- c("snp2fac1", "snp2fac2", "snp2fac1SE", "snp2fac2SE", "snp2fac1.t", "snp2fac2.t","snp2fac1.p", "snp2fac2.p", "df", "status", "SnpMu", "SnpVar") # pre <- matrix(c(NA, NA , NA,NA, NA , NA, FactorPre@output$minimum, length(FactorPre@output$estimate), FactorPre@output$status$code), 1, 10) # colnames(pre) <- c("snp", "SNPvar", "SNPmean", "snpSE", "SNPvarSE", "SNPmeanSE", "minimum", "df", "status") write.table(pre,output, append = F, row.names = "Null") for(rs in 1:runs){ print(rs) stBin <- st[rs] +1 fiBin <- fi[rs] +1 system(paste("awk '{print $0}'", snpCov, "| head -1 > covTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpCov, ">> covTMP.txt")) system(paste("awk '{print $0}'", snpWei, "| head -1 > weiTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpWei, ">> weiTMP.txt")) WEIS <- read.table("weiTMP.txt") COVS <- read.table("covTMP.txt") COVS <- COVS[rowSums(is.na(WEIS))<3,] WEIS <- WEIS[rowSums(is.na(WEIS))<3,] WEIS[is.na(WEIS)] <- .1 snp2facDWLS <- matrix(NA, dim(WEIS)[1], 10) estimates <- matrix(NA, dim(WEIS)[1], length(FactorPre@output$estimate)+4) for(fit in 1:dim(WEIS)[1]){ a <- matrix(0, ntv+1, ntv+1, dimnames = list(c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"))) a[f1Names, "Fac1"] <- .5 a[f2Names, "Fac2"] <- .5 a[c(f1Names, f2Names), covariates] <- .1 a["Fac1","snp"] <- .1 a["Fac2","snp"] <- .1 alab <- matrix("Null", ntv+1, ntv+1, dimnames = list(c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"))) alab[f1Names, "Fac1"] <- paste("l1" , 1:length(f1Names), sep = "") alab[f2Names, "Fac2"] <- paste("l2" , 1:length(f2Names), sep = "") alab[c(f1Names, f2Names), covariates] <- paste(rep(covariates, each= nItems), 1:nItems, sep = "_") alab["Fac1","snp"] <- "snp2fac1" alab["Fac2","snp"]<- "snp2fac2" s <- diag(ntv+1) dimnames(s) <- list(c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2")) sFree <- matrix(F, ntv+1, ntv+1, dimnames = list(c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"))) sFree[s!=0] <- c(FacModelFit$nThresh==0, T, F, F) sFree["Fac1", "Fac2"]<- T sFree["Fac2", "Fac1"]<- T slab <- matrix("Null", ntv+1, ntv+1, dimnames = list(c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"),c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"))) slab[s!=0] <- paste("var", c(f1Names, f2Names,covariates, "snp", "Fac1", "Fac2"), sep = "_") slab["Fac1", "Fac2"]<- "cov12" slab["Fac2", "Fac1"]<- "cov12" Cov <- cbind(rbind(ObsCov, COVS[fit, 1:(nItems+nCovariates)]), t(COVS[fit, 1:(nItems+nCovariates+1)])) Wei <- cbind(rbind(CovWei, WEIS[fit, 1:(nItems+nCovariates)]), t(WEIS[fit, 1:(nItems+nCovariates+1)])) Means <- c(ObsMean, COVS[fit, nItems+2]) MeansWeight <- c(MeanWei, WEIS[fit, nItems+2]) obsstat <- c(Cov[lower.tri(Cov, diag = T)] , Means) wei <- c(Wei[lower.tri(Wei, diag = T)] , MeansWeight) A <- mxMatrix(type = "Full", values = a, nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, free = a!=0, labels = alab, name = "A") S <- mxMatrix(type = "Full", values = s, nrow = ntv+1, ncol = ntv+1, free = sFree , labels = slab , name = "S") # not very general II <- mxMatrix(type = "Iden", nrow = ntv+1, ncol = ntv+1, name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(ntv-1), matrix(0, ntv-1, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF%&% (solve(II - A)%&% (S )), "expectedCov") expectedMean <- mxMatrix("Full", values = Means, nrow = 1, ncol = nItems+nCovariates+1, free = c(FacModelFit$nThresh==0,T), labels = c(paste("mean", c(f1Names, f2Names), sep = "_"), paste("mean", covariates, sep = "_"), "SNPmean"), name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") ordModelDWLS <- mxModel("DWLS", A, S, II, FF, expectedCov, expectedMean, vec, obsvec, weights, dwls, DWLSobj) ordFitDWLS <- mxRun(ordModelDWLS, unsafe = T, silent = T) params <- as.vector(ordFitDWLS@output$estimate[c("snp2fac1", "snp2fac2")]) SES <- as.vector(t(ordFitDWLS@output$standardErrors)[,c("snp2fac1","snp2fac2")]) snp2facDWLS[fit,] <- c(params, SES, params/SES, 2*pnorm(-abs(params/SES)), length(ordFitDWLS@output$estimate), ordFitDWLS@output$status$code) estimates[fit,] <- ordFitDWLS@output$estimate colnames(estimates) <- names(ordFitDWLS@output$estimate) } # close fit rownames(snp2facDWLS) <- rownames(COVS) snp2facDWLS <- cbind(snp2facDWLS, COVS$Mus, COVS$Vars) write.table(snp2facDWLS,output, append = T, col.names = F, row.names = T) rownames(estimates) <- rownames(COVS) if(rs==1) {write.table(estimates,"twoFacEstimates.txt", append = F, col.names = T, row.names = T)} else{ write.table(estimates,"twoFacEstimates.txt", append = T, col.names = F, row.names = T)} } # close rs } # This conducts the GWAS on the latent growth parameters using DWLS #require(OpenMx) growDWLS <- function(itemData, snpCov, snpWei, VarNames = colnames(dataset), covariates = NULL, runs = NA, output = "DWLSgrow.txt", inc = 1000, quadratic = T, orthogonal = T){ nVars <- length(VarNames) if (quadratic) nFac <- 3 else nFac <- 2 nCovariates <- length(covariates) nGen <- 1 ntv <- nVars+nCovariates+nGen+nFac dims <- c(VarNames, covariates, "SNP", if (quadratic) c("int", "lin", "quad") else c("int", "lin")) predims <- c(VarNames, covariates, if (quadratic) c("int", "lin", "quad") else c("int", "lin")) FacNames <- if (quadratic) c("int", "lin", "quad") else c("int", "lin") DATA <- subset(read.table(itemData, header = T), select = c(VarNames,covariates)) FacModelFit <- facCov01(DATA) ObsCov <- FacModelFit$covMatrix ObsMean <- FacModelFit$meanMatrix CovWei <- FacModelFit$covWeights MeanWei <- FacModelFit$meanWeights FacVars <- colnames (ObsCov) nSNP <- as.numeric(system(paste("wc -l <", snpCov), intern = T))-1 # Number of SNPs selVars <- colnames(ObsCov) st <- seq(1, nSNP, by=inc) fi <- seq(inc, nSNP, by=inc) if( fi[length(fi)] != nSNP) fi[length(fi)+1] <- nSNP if(is.na(runs)) runs<- length(fi) if (orthogonal) loadVals <- orthog(nVars)[,1:nFac] else loadVals <- matrix((c(rep(1,nVars),(0:(nVars-1)), (0:(nVars-1))^2)), nVars, nFac) a1 <- matrix(0, ntv-nGen, ntv-nGen, dimnames = list(predims, predims)) a1[1:nVars,(nVars+nCovariates+1):((nVars+nCovariates+nFac))] <- loadVals a1[VarNames,covariates]<- .1 alab1 <- matrix("NULL", ntv-nGen, ntv-nGen, dimnames = list(predims, predims)) alab1[1:nVars,"int"] <- paste("I_" , VarNames, sep = "") alab1[1:nVars,"lin"] <- paste("L_" , VarNames, sep = "") if (quadratic) alab1[1:nVars,"quad"] <- paste("Q_" , VarNames, sep = "") alab1[VarNames,covariates] <- paste(rep(covariates, nVars), VarNames, sep = "_") aFree1 <- matrix(F, ntv-nGen, ntv-nGen, dimnames = list(predims, predims)) aFree1[VarNames,covariates] <- T sVal1 <- diag(ntv-nGen) sVal1[(nVars+nCovariates+1):(nVars+nCovariates+nFac), (nVars+nCovariates+1):(nVars+nCovariates+nFac)] <- sVal1[(nVars+nCovariates+1):(nVars+nCovariates+nFac), (nVars+nCovariates+1):(nVars+nCovariates+nFac)] + .2 sFree1 <- (diag(c(diag(CovWei)!=0, diag(diag(nFac)==1))))==1 sFree1[(nVars+nCovariates+1):(nVars+nCovariates+nFac),(nVars+nCovariates+1):(nVars+nCovariates+nFac)] <- T sLab1 <- matrix("NULL", ntv-nGen, ntv-nGen, dimnames = list(predims, predims)) sLab1[sVal1==1][1:(nVars+nCovariates)]<- paste("v_", c(rep("res", nVars), covariates),sep = "") sLab1[(nVars+nCovariates+1):(nVars+nCovariates+nFac), (nVars+nCovariates+1):(nVars+nCovariates+nFac)]<- matrix(c("v_int", "i_l", "i_q", "i_l", "v_lin", "l_q", "i_q", "l_q", "v_quad"),3,3)[1:nFac,1:nFac] obsstat <- c(FacModelFit$covMatrix[lower.tri(FacModelFit$covMatrix, diag = T)] , FacModelFit$meanMatrix) wei <- c(FacModelFit$covWeights[lower.tri(FacModelFit$covWeights, diag = T)] , FacModelFit$meanWeights) A <- mxMatrix(type = "Full", values = a1, nrow = (nVars+nCovariates+nFac), ncol = (nVars+nCovariates+nFac), free = aFree1, labels = alab1, name = "A", dimnames = list (predims, predims)) load <- mxMatrix(type = "Full", nrow = nVars, ncol = nFac, values = loadVals, free = F, name = "load") S <- mxMatrix(type = "Full", values = sVal1, nrow = (ntv-nGen), ncol = (ntv-nGen), free = sFree1 , labels = sLab1 , name = "S", dimnames = list (predims, predims)) II <- mxMatrix(type = "Iden", nrow = (nVars+nCovariates+nFac), ncol = (nVars+nCovariates+nFac), name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(ntv-nGen-nFac), matrix(0, ntv-nGen-nFac, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF %&% (solve(II - A)%&% (S) ), "expectedCov") facMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = nFac, free = T, labels = paste(FacNames, "mu", sep = "_"), name = "facMean") ItemMean <- mxAlgebra(t(load %*% t(facMean)), name = "ItemMean") CovMean <- mxMatrix(type = "Full", values = 0, nrow = 1, ncol = nCovariates, free = T, labels = paste(covariates, "mu", sep = "_"), name = "CovMean") expectedMean <- mxAlgebra(cbind(ItemMean , CovMean), name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") # Objective Function for DWLS FactorModel <- mxModel("DWLS", A, load, S, II, FF, facMean, expectedCov, ItemMean, CovMean, expectedMean, vec, obsvec, weights, dwls, DWLSobj) FactorPre <- mxRun(FactorModel, unsafe = T, silent = T) # summary(FactorPre) pre <- matrix(c(rep(NA, nFac*4 ), length(FactorPre@output$estimate), FactorPre@output$status$code, NA, NA), 1, (nFac*4 + 4)) colnames(pre) <- c(paste("SNP_", FacNames, sep=""), paste("SE_", FacNames, sep=""), paste("t_", FacNames, sep=""),paste("p_", FacNames, sep=""), "df", "status", "SnpMu", "SnpVar") # pre <- matrix(c(NA, NA , NA,NA, NA , NA, FactorPre@output$minimum, length(FactorPre@output$estimate), FactorPre@output$status$code), 1, 10) # colnames(pre) <- c("snp", "SNPvar", "SNPmean", "snpSE", "SNPvarSE", "SNPmeanSE", "minimum", "df", "status") write.table(pre,output, append = F, row.names = "Null") for(rs in 1:runs){ print(rs) stBin <- st[rs] +1 fiBin <- fi[rs] +1 system(paste("awk '{print $0}'", snpCov, "| head -1 > covTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpCov, ">> covTMP.txt")) system(paste("awk '{print $0}'", snpWei, "| head -1 > weiTMP.txt ")) system(paste("sed -n '", paste(stBin, ",", fiBin, sep=""), "p'", snpWei, ">> weiTMP.txt")) WEIS <- read.table("weiTMP.txt") COVS <- read.table("covTMP.txt") COVS <- COVS[rowSums(is.na(WEIS))<3,] WEIS <- WEIS[rowSums(is.na(WEIS))<3,] WEIS[is.na(WEIS)] <- .1 snp2facDWLS <- matrix(NA, dim(WEIS)[1], (nFac*4 + 2)) estimates <- matrix(NA, dim(WEIS)[1], length(FactorPre@output$estimate)+nFac+2) for(fit in 1:dim(WEIS)[1]){ a <- matrix(0, ntv, ntv, dimnames = list(dims, dims)) a[1:nVars,(nVars+nCovariates+nGen+1):((nVars+nCovariates+nGen+nFac))] <- loadVals a[FacNames, "SNP"]<- .1 a[VarNames,covariates]<- .1 alab <- matrix("NULL", ntv, ntv, dimnames = list(dims, dims)) alab[1:nVars,"int"] <- paste("I" , VarNames, sep = "_") alab[1:nVars,"lin"] <- paste("L" , VarNames, sep = "_") if (quadratic) alab[1:nVars,"quad"] <- paste("Q" , VarNames, sep = "_") alab[VarNames,covariates] <- paste(rep(covariates, nVars), VarNames, sep = "_") alab[FacNames,"SNP"] <- paste("SNP" , FacNames, sep = "_") aFree <- matrix(F, ntv, ntv, dimnames = list(dims, dims)) aFree[VarNames,covariates] <- T aFree[FacNames, "SNP"] <- T sVal1 <- diag(ntv-nGen) sVal1[(nVars+nCovariates+1):(nVars+nCovariates+nFac), (nVars+nCovariates+1):(nVars+nCovariates+nFac)] <- sVal1[(nVars+nCovariates+1):(nVars+nCovariates+nFac), (nVars+nCovariates+1):(nVars+nCovariates+nFac)] + .2 sFree1 <- (diag(c(diag(CovWei)!=0, diag(diag(nFac)==1))))==1 sFree1[(nVars+nCovariates+1):(nVars+nCovariates+nFac),(nVars+nCovariates+1):(nVars+nCovariates+nFac)] <- T sLab1 <- matrix("NULL", ntv-nGen, ntv-nGen, dimnames = list(predims, predims)) sLab1[sVal1==1][1:(nVars+nCovariates)]<- paste("v_", c(rep("res", nVars), covariates),sep = "") sLab1[(nVars+nCovariates+1):(nVars+nCovariates+nFac), (nVars+nCovariates+1):(nVars+nCovariates+nFac)]<- matrix(c("v_int", "i_l", "i_q", "i_l", "v_lin", "l_q", "i_q", "l_q", "v_quad"),3,3)[1:nFac,1:nFac] sFree <- (diag(c(diag(CovWei)!=0, diag(diag(nFac+nGen)==1))))==1 sFree[(nVars+nCovariates+nGen+1):(nVars+nCovariates+nGen+nFac),(nVars+nCovariates+nGen+1):(nVars+nCovariates+nGen+nFac)] <- T sPre <- diag(ntv)==1 sPre[(nVars+nCovariates+nGen+1):(ntv),(nVars+nCovariates+nGen+1):(ntv)] <- T sLab <- matrix("NULL", ntv, ntv, dimnames = list(dims, dims)) sLab[sPre==T][1:(nVars+nCovariates+nGen)]<- paste("v_", c(rep("res", nVars), c(covariates, "SNP")),sep = "") sLab[(nVars+nCovariates+nGen+1):(ntv), (nVars+nCovariates+nGen+1):(ntv)]<- matrix(c("v_int", "i_l", "i_q", "i_l", "v_lin", "l_q", "i_q", "l_q", "v_quad"),3,3)[1:nFac,1:nFac] sVal <- matrix(0, ntv, ntv, dimnames = list(dims, dims)) sVal[sFree==T] <- .2 sVal <- sVal + diag(ntv) *.8 genSnpCov <- matrix(COVS[fit, c(VarNames,covariates)], 1, (nVars + nCovariates), dimnames = list ( rownames(COVS[fit,]),colnames(ObsCov))) genCov <- matrix(COVS[fit, "Vars"], 1, 1, dimnames = list(rownames(genSnpCov),rownames(genSnpCov))) Cov <- cbind(rbind(ObsCov, genSnpCov), t(cbind(genSnpCov,genCov))) genSnpWei <- matrix(WEIS[fit, c(VarNames,covariates)], 1, (nVars + nCovariates), dimnames = list ( rownames(COVS[fit,]),colnames(ObsCov))) genWei <- matrix(WEIS[fit, "Vars"], 1, 1, dimnames = list(rownames(genSnpWei),rownames(genSnpWei))) Wei <- cbind(rbind(CovWei, genSnpWei), t(cbind(genSnpWei,genWei))) Means <- c(ObsMean, COVS[fit, c("Mus")]) MeansWeight <- c(MeanWei, WEIS[fit, c("Mus")]) obsstat <- c(unlist(Cov[lower.tri(Cov, diag = T)]), Means) wei <- c(unlist(Wei[lower.tri(Wei, diag = T)]) , MeansWeight) A <- mxMatrix(type = "Full", values = a, nrow = (ntv), ncol = (ntv), free = aFree, labels = alab, name = "A", dimnames = list (dims, dims)) load <- mxMatrix(type = "Full", nrow = nVars, ncol = nFac, values =loadVals , free = F, name = "load") S <- mxMatrix(type = "Full", values = sVal, nrow = (ntv), ncol = (ntv), free = sFree , labels = sLab , name = "S", dimnames = list (dims, dims)) II <- mxMatrix(type = "Iden", nrow = (ntv), ncol = (ntv), name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(ntv-nFac), matrix(0, ntv-nFac, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF %&% (solve(II - A)%&% (S) ), "expectedCov") # facMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = nFac, free = T, labels = paste(FacNames, "_mu", sep = ""), name = "facMean") # expectedMean <- mxAlgebra(t(load %*% t(facMean)), name = "Mean") facMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = nFac, free = T, labels = paste(FacNames, "mu", sep = "_"), name = "facMean") ItemMean <- mxAlgebra(t(load %*% t(facMean)), name = "ItemMean") CovMean <- mxMatrix(type = "Full", values = 0, nrow = 1, ncol = nCovariates+1, free = T, labels = c(paste(covariates, "mu", sep = "_"), "snp_mu"), name = "CovMean") expectedMean <- mxAlgebra(cbind(ItemMean , CovMean), name = "Mean") vec <- mxAlgebra(expression= rbind(cbind(vech(expectedCov)), cbind(t(Mean))), name="vec") # Expected Parameter Vector obsvec <- mxMatrix("Full", length(obsstat), 1, values= obsstat, name="obsvec") # Observed Parameter Vector weights <- mxMatrix("Full", length(wei), 1, values=wei, name="weights") # Weights for DWLS dwls <- mxAlgebra(sum( (vec-obsvec) * weights *(vec-obsvec)), name="dwls") # Algebra for the Objective Function DWLSobj <- mxFitFunctionAlgebra(algebra = "dwls") # Objective Function for DWLS growModelDWLS <- mxModel("grow", A, load, S, II, FF, facMean, ItemMean, CovMean, expectedMean, expectedCov, expectedMean, vec, obsvec, weights, dwls, DWLSobj) growFitDWLS <- mxRun(growModelDWLS, unsafe = T, silent = T) #summary(growFitDWLS) params <- as.vector(growFitDWLS@output$estimate[paste("SNP", FacNames, sep="_")]) SES <- growFitDWLS@output$standardErrors[paste("SNP", FacNames, sep="_"),1] #snp2facDWLS[fit,] <- c(params, SES, ordFitDWLS@output$minimum, ordFitDWLSnull@output$minimum,length(ordFitDWLS@output$estimate), ordFitDWLS@output$status$code) snp2facDWLS[fit,] <- c(params, SES, params/SES, 2*pnorm(-abs(params/SES)), length(growFitDWLS@output$estimate), growFitDWLS@output$status$code) estimates[fit,] <- growFitDWLS@output$estimate colnames(estimates) <- names(growFitDWLS@output$estimate) } # close fit rownames(snp2facDWLS) <- rownames(COVS) snp2facDWLS <- cbind(snp2facDWLS, COVS$Mus, COVS$Vars) write.table(snp2facDWLS,output, append = T, col.names = F, row.names = T) rownames(estimates) <- rownames(COVS) if(rs==1) {write.table(estimates,"growEstimates.txt", append = F, col.names = T, row.names = T)} else{ write.table(estimates,"growEstimates.txt", append = T, col.names = F, row.names = T)} } # close rs } # This function conducts the GWAS using FILM # Note that it is very slow and should only be used with a small number of continuous variables gwasFIML <- function(FacVars, FacModelData, SNPdata = NULL, output = "FIMLout.txt", runs = NA, inc = 1000){ facData <- subset(read.table(FacModelData), select = FacVars) nVariables <- dim(facData)[2] nSNP <- as.numeric(system(paste("sed -n '2'p", SNPdata, "| wc -w"), intern = T)) # Number of SNPs manVars <- colnames(facData) st <- seq(1, nSNP, by=inc) fi <- seq(inc, nSNP, by=inc) if( fi[length(fi)] != nSNP) fi[length(fi)+1] <- nSNP if(is.na(runs)) runs<- length(fi) FacData <- mxData(observed=facData[,1:nVariables], type="raw") res <- mxPath(from=manVars, arrows=2, free=rep(T, nVariables), values=rep(1, nVariables), labels=paste("e", 1:nVariables, sep = "") ) facCov <- mxPath(from="F1", arrows=2, connect="unique.pairs", free=F, values=1, labels=c("varF1") ) F1load <- mxPath(from="F1", to=manVars, arrows=1, free=T, values=.7, labels=paste("l", 1:nVariables, sep = "") ) means <- mxPath(from="one",to= c(manVars, "F1"), arrows=1, free=c(rep(T, nVariables), F), values=c(rep(F, nVariables),0), labels=c(paste("mean", 1:nVariables, sep = ""),NA) ) FactorModel <- mxModel("Factor", type="RAM", manifestVars = manVars, latentVars=c("F1"), FacData, res, facCov, F1load, means) FactorPre <- mxRun(FactorModel, silent = T) pre <- matrix(c(NA, FactorPre@output$estimate, NA, FactorPre@output$minimum, 0, length(FactorPre@output$estimate), FactorPre@output$status$code), 1, 21); colnames(pre) <- c("beta",names(FactorPre@output$estimate), "Nullmin", "min", "MAF", "df", "NPSOL") write.table(pre,output, append = F, row.names = "Null") lds <- omxGetParameters(FactorPre)[1:nVariables] rss <- omxGetParameters(FactorPre)[(nVariables+1):(2*nVariables)] mus <- omxGetParameters(FactorPre)[(2*nVariables+1):(3*nVariables)] for(rs in 1:runs){ print(rs) stBin <- st[rs] fiBin <- fi[rs] system(paste("cut -d ' ' -f", paste(stBin, "-", fiBin, sep=""), SNPdata, "> tmp.txt") ) snp <- read.table("tmp.txt", header = T) for(i in 1:dim(snp)[2]) { snp[,i] <- as.numeric(snp[,i])-1 } snp2fac <- apply(snp , 2, function(x) { Gdata <- cbind(facData,x) FacData <- mxData(observed=Gdata, type="raw") xbar <- mean(Gdata[,nVariables+1]) sigma2 <- sd(Gdata[,nVariables+1])^2 res <- mxPath(from=c(manVars, "x"), arrows=2, free=c(rep(T, nVariables),F), values=c(rss,sigma2), labels=c(paste("e", 1:nVariables, sep = ""),"res1") ) facCov <- mxPath(from=c("F1"), arrows=2, connect="unique.pairs", free=c(FALSE), values=1, labels=c("varF1") ) F1load <- mxPath(from="F1", to=manVars, arrows=1, free=T, values=lds, labels=paste("l", 1:nVariables, sep = "") ) means <- mxPath(from="one",to= c(manVars, "x", "F1"), arrows=1, free=c(rep(T, nVariables), F, F), values=c(mus,xbar,0), labels=c(paste("mean", 1:nVariables, sep = ""),"def",NA) ) def <- mxPath(from= "x" , to="F1", arrows=1, free=T, values=.00, labels=c("snp") ) FactorModel <- mxModel("Factor", type="RAM", manifestVars = c(manVars, "x"), latentVars=c("F1"), FacData, res, facCov, F1load, means, def) FactorModel <- mxOption(FactorModel, "Calculate Hessian", "No") # may optimize for speed FactorModel <- mxOption(FactorModel, "Standard Errors" , "No") # may optimize for speed NullModel <- omxSetParameters(FactorModel, names(omxGetParameters(FactorModel)), free = F) NullFit <- mxRun(NullModel, silent = T) FactorFit <- mxRun(FactorModel, unsafe = T, silent = T) c(FactorFit@output$estimate, NullFit@output$minimum, FactorFit@output$minimum, xbar/2, length(FactorFit@output$estimate), FactorFit@output$status$code) } ) write.table(t(snp2fac),output, append = T, col.names = F) } } ##################################### gwasOrdFIML <- function(FacVars, covariates = NULL, FacModelData, SNPdata, output = "ordFIMLout.txt", runs = NA, inc = 1000){ facData <- subset(read.table(FacModelData, header = T), select = c(FacVars, covariates)) ordtest <- as.data.frame(t(apply(facData, 2, function(x) length(unique(x))))) nVariables <- length(FacVars) nCovariates <- length(covariates) nSNP <- as.numeric(system(paste("sed -n '2'p", SNPdata, "| wc -w"), intern = T)) # Number of SNPs manVars <- colnames(facData) nThr <- 3 st <- seq(1, nSNP, by=inc) fi <- seq(inc, nSNP, by=inc) if( fi[length(fi)] != nSNP) fi[length(fi)+1] <- nSNP if(is.na(runs)) runs<- length(fi) nItems <- length(c(FacVars)) nFac <- 1 a1 <- matrix(0, nItems+nCovariates+nFac, nItems+nCovariates+nFac) dimnames(a1) <- list(c(FacVars, covariates, "Fac"),c(FacVars, covariates, "Fac")) a1[FacVars, "Fac"] <- .95 a1[FacVars, covariates] <- .1 alab1 <- matrix("Null", nItems+nCovariates+nFac, nItems+nCovariates+nFac) dimnames(alab1) <- list(c(FacVars, covariates, "Fac"),c(FacVars, covariates, "Fac")) alab1[FacVars, "Fac"]<- paste("l" , 1:length(FacVars), sep = "") alab1[FacVars, covariates] <- paste(FacVars, rep(covariates, each = nVariables), sep = "_") FacData <- mxFactor(as.data.frame(facData),levels=c(1,2,3,4)) FacData <- mxData(observed=FacData, type="raw") selVars <- c(FacVars, covariates) A <- mxMatrix(type = "Full", values = a1, nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), free = a1!=0, labels = alab1, name = "A") S <- mxMatrix(type = "Diag", values = 1, nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), free = c(rep(T, nItems+nCovariates), F) , labels = c(paste("var", FacVars, sep = ""), paste("var", covariates, sep = ""), "Fvar") , name = "S") # not very general II <- mxMatrix(type = "Iden", nrow = (nItems+nCovariates+1), ncol = (nItems+nCovariates+1), name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(nItems+nCovariates), matrix(0, nItems+nCovariates, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF %&% (solve(II - A)%&% (S) ), "expectedCov") expectedMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = (nItems+nCovariates), free = T, labels = c(paste("mu", FacVars, sep = "_"),paste("mu", covariates, sep = "_")), name = "expectedMean") thresh <- mxMatrix("Full", name="thresh", nrow=nThr, ncol=nVariables+nCovariates, values=c(0, 1, 2), free = c(F, F, T)) obj <- mxExpectationNormal(covariance="expectedCov", means="expectedMean", dimnames = selVars, thresholds="thresh") fun <- mxFitFunctionML() FactorModel <- mxModel("Pre", A, S, II, FF, expectedCov, expectedMean, thresh, obj, fun, FacData) FactorModel <- mxOption(FactorModel, "Calculate Hessian", "No") # may optimize for speed FactorModel <- mxOption(FactorModel, "Standard Errors" , "No") # may optimize for speed FactorPre <- mxRun(FactorModel, silent = T) summary(FactorPre) lds <- FactorPre@output$estimate[paste("l" , 1:length(FacVars), sep = "")] vars <- FactorPre@output$estimate[c(paste("var", FacVars, sep = ""), paste("var", covariates, sep = ""))] thr <- FactorPre$thresh$values pre <- matrix(c(NA, lds, NA, NA, FactorPre@output$minimum, 0, length(FactorPre@output$estimate), FactorPre@output$status$code), 1, 7+nItems) colnames(pre) <- c("beta", paste("l" , 1:length(FacVars), sep = ""), "snpVar", "snpMu", "NullMin", "SnpMin", "df", "code") write.table(pre,output, append = F, row.names = "Null") for(rs in 1:runs){ print(rs) stBin <- st[rs] fiBin <- fi[rs] system(paste("cut -d ' ' -f", paste(stBin, "-", fiBin, sep=""), SNPdata, "> tmp.txt") ) snp <- read.table("tmp.txt", header = T) tmpNames<- colnames(snp) snp2fac <- matrix(NA , inc, 7+nItems, dimnames = list(c(tmpNames),c("beta", paste("l" , 1:length(FacVars), sep = ""), "snpVar", "snpMu", "NullMin", "SnpMin", "df", "code"))) for(fit in 1:inc){ Gdata <- as.data.frame(cbind(facData,snp[,fit]) ) colnames(Gdata) <- VARS <- c(FacVars, covariates, tmpNames[fit]) Gdata[,1:(nItems+nCovariates)] <- mxFactor(as.data.frame(Gdata[,1:(nItems+nCovariates)]),levels=c(1,2,3,4)) FacData <- mxData(observed=Gdata, type="raw") a <- matrix(0, nItems+nCovariates+nFac+1, nItems+nCovariates+nFac+1) dimnames(a) <- list(c(FacVars, covariates, "snp","Fac"),c(FacVars, covariates, "snp","Fac")) a[FacVars, "Fac"] <- lds a[FacVars, covariates] <- .01 a["Fac","snp"] <- .01 alab <- matrix("Null", nItems+nCovariates+nFac+1, nItems+nCovariates+nFac+1) dimnames(alab) <- list(c(FacVars, covariates, "snp","Fac"),c(FacVars, covariates, "snp","Fac")) alab[FacVars, "Fac"]<- paste("l" , 1:nItems, sep = "") alab[FacVars, covariates] <- paste(FacVars, rep(covariates, each = nVariables), sep = "_") alab["Fac","snp"] <- "snp" aFree <- a!=0 SnpVars <- var(Gdata[,(nVariables+nCovariates+1)]) SnpMeans <- mean(Gdata[,(nVariables+nCovariates+1)]) A <- mxMatrix(type = "Full", values = a, nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, free = aFree, labels = alab, name = "A") S <- mxMatrix(type = "Diag", values = c(vars, SnpVars, 1), nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, free = c(rep(T, nItems+nCovariates), T, F) , labels = c(paste("var", c(FacVars, covariates), sep= ""), "SNPvar", "FacVar") , name = "S") II <- mxMatrix(type = "Iden", nrow = nItems+nCovariates+nFac+1, ncol = nItems+nCovariates+nFac+1, name = "II") FF <- mxMatrix(type = "Full", values = cbind(diag(nItems+nCovariates+1), matrix(0, nItems+nCovariates+1, nFac)), free = F, name = "FF") expectedCov <- mxAlgebra( FF%&% (solve(II - A)%&% (S )), "expectedCov") expectedMean <- mxMatrix("Full", values = 0, nrow = 1, ncol = nItems+nCovariates+1, free = T, labels = c(paste("mean", c(FacVars, covariates), sep = ""), "SNPmean"), name = "expectedMean") thresh <- mxMatrix("Full", name="thresh", nrow=nThr, ncol=nVariables+nCovariates, values=thr, free = c(F, F, T), dimnames = list(c("th1", "th2", "th3"), selVars)) obj <- mxExpectationNormal(covariance="expectedCov", means="expectedMean", dimnames = VARS, thresholds="thresh", threshnames = selVars) fun <- mxFitFunctionML() FactorModel <- mxModel("SNP", A, S, II, FF, expectedCov, expectedMean, thresh, obj, fun, FacData) FactorModel <- mxOption(FactorModel, "Calculate Hessian", "No") # may optimize for speed FactorModel <- mxOption(FactorModel, "Standard Errors" , "No") # may optimize for speed #NullModel <- omxSetParameters(FactorModel, names(omxGetParameters(FactorModel)), free = F) FactorReg <- mxRun(FactorModel, silent = T) Bsnp <- FactorReg@output$estimate["snp"] lds <- FactorReg@output$estimate[paste("l" , 1:length(FacVars), sep = "")] NullModel <- omxSetParameters(FactorModel, "snp", free = F, values = 0) NullFit <- mxRun(NullModel, silent = T) snp2fac[fit,] <- c(Bsnp, lds, SnpVars, SnpMeans, NullFit$output$minimum, FactorReg$output$minimum, length(FactorReg$output$estimate), FactorReg$output$status$code) } write.table(snp2fac,output, append = T, col.names = F) } } manhattan <- function(outputFile, title = "Manhattan Plot"){ out <- read.table(outputFile, header = T) out$t <- out$snp/out$snpSE out$p <- 2*pnorm(-abs(out$snp/out$snpSE)) plot(-log10(out$p), ylab = "-Log10 (P value)", ylim = c(0, 9), main = title) abline(h = 5, col = "blue") abline(h = 8, col = "red") }