午夜视频在线网站,日韩视频精品在线,中文字幕精品一区二区三区在线,在线播放精品,1024你懂我懂的旧版人,欧美日韩一级黄色片,一区二区三区在线观看视频

分享

Tutorial for the WGCNA package for R

 頭頭了不起 2021-04-13
2字數(shù) 314閱讀 1,306

I. Network analysis of liver expression data from female mice: finding modules related to body weight

參考1:Tutorials for the WGCNA package
參考2:一文學(xué)會WGCNA分析
參考3:STEP6:WGCNA相關(guān)性分析
第一步:Data input and cleaning

1.1 數(shù)據(jù)的導(dǎo)入

rm(list = ls())
setwd('E:/gsj/RWD/WGCNA/')
library(WGCNA)
options(stringsAsFactors = FALSE)# 如果需要保存變量的話這一步不能省
#讀入文件
femData = read.csv("LiverFemale3600.csv")
#查看一下文件格式,如果不符合,就需要修改
name(femData)
head(femData)

#進行WGCNA分析的時候,要求輸入的表達矩陣,行名是樣本,列名是基因
datExpr0 = as.data.frame(t(femData[, -c(1:8)]))
names(datExpr0) = femData$substanceBXH
rownames(datExpr0) = names(femData)[-c(1:8)]
dim(datExpr0)

1.2 檢查是否有離群值

#檢查是否有缺失值,沒問題就會返回TRUE
gsg = goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK

#如果不是TREU,那么需要進行下面操作
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
    printFlush(paste("Removing genes:", paste(names(datExpr0[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
    printFlush(paste("Removing samples:",paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}

#第二步,對樣本進行聚類,判斷有無outlier的樣本
sampleTree = hclust(dist(datExpr0), method = "average");
sizeGrWindow(12,9)
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
#從下圖可以看出sampleF2_221是一個離群值,要么手動刪掉,要么設(shè)置一個閾值,剔除掉
abline(h = 15, col = "red")
#按照設(shè)定的高度
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
#可以看出cluster1包含了我們最后所需要的樣本
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#通過以上的處理,最后得到的datExpr對象就可以用來進行后續(xù)的分析
缺失值檢查

離群樣本檢查

cluster選擇

1.3 導(dǎo)入臨床信息

#導(dǎo)入臨床信息
traitData = read.csv("ClinicalTraits.csv");
dim(traitData)
names(traitData)
#除去不相關(guān)的信息
allTraits = traitData[, -c(31, 16)];
allTraits = allTraits[, c(2, 11:36) ];
dim(allTraits)
names(allTraits)
#每一個樣本都會有對應(yīng)的臨床信息,包括體重,長度等
view(allTraits)

#將臨床信息和表樣本名結(jié)合起來
femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1]
view(datTraits)

#最后在進行后續(xù)的網(wǎng)絡(luò)構(gòu)建和模塊選擇之前,我們可以看一下樣本和臨床信息之間的匹配度
sampleTree2 = hclust(dist(datExpr), method = "average")
#將特性與顏色相關(guān)聯(lián),白色表示低;紅色表示高;灰色表示缺失
traitColors = numbers2colors(datTraits, signed = FALSE);
#出圖,紅色代表高的,白色代表低的,灰色代表缺失
plotDendroAndColors(sampleTree2, traitColors,groupLabels = names(datTraits),main = "Sample dendrogram and trait heatmap")

#沒有問題的話就保存環(huán)境變量
save(datExpr, datTraits, file = "FemaleLiver-01-dataInput.RData")

第二步:Automatic network construction and module detection

2.0 構(gòu)建R環(huán)境

######################################
#如果你關(guān)掉了R,那么就需要運行下面幾步
#rm(list = ls())
#setwd('E:/gsj/RWD/WGCNA/')
#library(WGCNA)
#options(stringsAsFactors = FALSE)#這一步不能省
#enableWGCNAThreads()#這一條語句指的是允許WGCNA使用多線程,但是如果是在本機上使用的話,這一步過程可以跳過
#lnames = load(file = "FemaleLiver-01-dataInput.RData")
#以上,成功導(dǎo)入第一步生成的datExpr和datTraits

2.1 確定合適的閾值

#由不同的方式構(gòu)建基因網(wǎng)絡(luò),這里選擇自動一步構(gòu)建基因網(wǎng)絡(luò)的方法
#確定合適的閾值范圍
powers = c(c(1:10), seq(from = 12, to=20, by=2))
#調(diào)用pickSoftThreshold函數(shù)分析出合適的閾值
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
#畫圖,結(jié)果展示
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],xlab="Soft Threshold (power)",
     ylab="Scale Free Topology Model Fit,signed R^2",type="n",main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red")
abline(h=0.90,col="green")

plot(sft$fitIndices[,1], sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
#以上會出來兩張圖,由此確定選取閾值6
閾值選擇

2.2 一步構(gòu)建網(wǎng)絡(luò)圖和模塊選擇

#一步構(gòu)建網(wǎng)絡(luò)和模塊選擇
net = blockwiseModules(datExpr, power = 6,TOMType = "unsigned", minModuleSize = 30,reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,saveTOMs = TRUE,saveTOMFileBase = "femaleMouseTOM",verbose = 3)
#參數(shù)mergeCutHeight為合并模塊的一個閾值
#上述的參數(shù)設(shè)置均為下限值,不同的數(shù)據(jù)類型有不同的參數(shù)設(shè)置
#如果電腦的運行跟不上,建議參考大樣本的那種網(wǎng)絡(luò)構(gòu)建方法

#看一下有多少個模塊以及模塊當(dāng)中所包含的基因
#如下圖展示的結(jié)果,一共有18個模塊,從1到18模塊,按照基因數(shù)遞減排列,模塊0表示沒有分類的基因數(shù)
table(net$colors)
#層次聚類圖的結(jié)果包含在net$dendrograms[[1]]對象中
#用以下代碼可以將樹圖和顏色分布整合,樹圖是對基因進行的聚類,下面不同顏色代表這個基因處于哪個模塊
#設(shè)置窗口的大小
sizeGrWindow(12, 9)
mergedColors = labels2colors(net$colors)
#出圖
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
 
 #此外,如果用戶想要修改一些參數(shù),可以使用recutBlockwiseTrees這個函數(shù)
 
 #保存數(shù)據(jù)
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
     file = "FemaleLiver-02-networkConstruction-auto.RData")

第三步:relating modules to external information and identifying important genes

3.0 構(gòu)建R環(huán)境
#rm(list = ls())
#setwd('E:/gsj/RWD/WGCNA/')
#library(WGCNA)
#options(stringsAsFactors = FALSE)#這一步不能省
#enableWGCNAThreads()#這一條語句指的是允許WGCNA使用多線程,但是如果是在本機上使用的話,這一步過程可以跳過
#lnames = load(file = "FemaleLiver-01-dataInput.RData")
#以上,成功導(dǎo)入第一步生成的datExpr和datTraits
#lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
#lnames
#以上,成功導(dǎo)入第二步生成的參數(shù)
3.1 計算模塊和性狀之間的相關(guān)性
nGenes = ncol(datExpr);
nSamples = nrow(datExpr);
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0)##不同顏色的模塊的ME值矩陣(樣本vs模塊)
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);

#這一步將會展示相關(guān)性和P值
sizeGrWindow(10,6)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3))
#這一步將會在熱圖上展示相關(guān)系數(shù)
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = greenWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))
#以上可以通過這個熱圖發(fā)現(xiàn)和性狀相關(guān)的基因,在這之后我們主要關(guān)注weight這一性狀相關(guān)的基因

3.2 確定相關(guān)模塊中的顯著相關(guān)基因
#性狀跟模塊雖然求出了相關(guān)性,可以挑選最相關(guān)的那些模塊來分析,但是模塊本身仍然包含非常多的基因,還需進一步的尋找最重要的基因
#首先計算模塊與基因的相關(guān)性矩陣
weight = as.data.frame(datTraits$weight_g);
names(weight) = "weight" #單獨把weight這一列提出來,做一個data.frame
modNames = substring(names(MEs), 3) #提出每一模塊的顏色
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p")); #這一步,模塊與基因的相關(guān)性矩陣
## 算出每個模塊跟基因的皮爾森相關(guān)系數(shù)矩陣
## MEs是每個模塊在每個樣本里面的值
## datExpr是每個基因在每個樣本的表達量

MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
#再計算性狀與基因的相關(guān)性矩陣
# 只有連續(xù)型性狀才能只有計算
geneTraitSignificance = as.data.frame(cor(datExpr, weight, use = "p")); #這一步,性狀與基因的相關(guān)性矩陣
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(weight), sep="");
names(GSPvalue) = paste("p.GS.", names(weight), sep="");
3.3 篩選出和性狀以及模塊相關(guān)性都很高的基因
#這里可以從上圖中看出weight這一個性狀中,棕色的模塊最相關(guān)
module = "brown"
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7);
par(mfrow = c(1,1));
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Gene significance for body weight",
                   main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
 #這一張圖顯示的是在棕色模塊中,基因先屬性和模塊的關(guān)系(類似于MEs)
 #可以看出,和性狀高度相關(guān)的基因往往在和性狀高度相關(guān)的模塊中
#經(jīng)過上述操作,我們已經(jīng)找到和感興趣的性狀最相關(guān)的模型,同時還得到了相關(guān)的基因
3.4 將網(wǎng)絡(luò)分析結(jié)果輸出
#返回所有的probe ID
names(datExpr)
#返回brown模塊對應(yīng)的基因
names(datExpr)[moduleColors=="brown"]
#可以提供一個轉(zhuǎn)換ID的文件
annot = read.csv(file = "GeneAnnotation.csv");
dim(annot)
names(annot)
probes = names(datExpr)
probes2annot = match(probes, annot$substanceBXH)
#計算沒有注釋到的probe數(shù)量
sum(is.na(probes2annot))

#隨后構(gòu)建一個data.frame,描述probe ID、gene symbol、模塊顏色、基因與weight模塊的顯著性、p值
geneInfo0 = data.frame(substanceBXH = probes,
                       geneSymbol = annot$gene_symbol[probes2annot],
                       LocusLinkID = annot$LocusLinkID[probes2annot],
                       moduleColor = moduleColors,
                       geneTraitSignificance,
                       GSPvalue)

modOrder = order(-abs(cor(MEs, weight, use = "p")));
for (mod in 1:ncol(geneModuleMembership))
{
  oldNames = names(geneInfo0)
  geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]],
                         MMPvalue[, modOrder[mod]]);
  names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""),
                       paste("p.MM.", modNames[modOrder[mod]], sep=""))
}
geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.weight));
geneInfo = geneInfo0[geneOrder, ]
write.csv(geneInfo, file = "geneInfo.csv")

第四步:Interfacing network analysis with other data such as functional annotation and gene ontology

4.1 選取感興趣的基因進行功能注釋或者其他分析
# Read in the probe annotation
annot = read.csv(file = "GeneAnnotation.csv");
# Match probes in the data set to the probe IDs in the annotation file
probes = names(datExpr)
probes2annot = match(probes, annot$substanceBXH)
# Get the corresponding Locuis Link IDs
allLLIDs = annot$LocusLinkID[probes2annot];
# $ Choose interesting modules
intModules = c("brown", "red", "salmon")
for (module in intModules)
{
    # Select module probes
    modGenes = (moduleColors==module)
    # Get their entrez ID codes
    modLLIDs = allLLIDs[modGenes];
    # Write them into a file
    fileName = paste("LocusLinkIDs-", module, ".txt", sep="");
    write.table(as.data.frame(modLLIDs), file = fileName,row.names = FALSE, col.names = FALSE)
}
# As background in the enrichment analysis, we will use all probes in the analysis.
fileName = paste("LocusLinkIDs-all.txt", sep="");
write.table(as.data.frame(allLLIDs), file = fileName,row.names = FALSE, col.names = FALSE)
#經(jīng)過上述步驟,將會生成下面的文件,后續(xù)可以對感興趣的模塊進行富集分析等

第五步:Network visualization using WGCNA functions

5.0 構(gòu)建R環(huán)境
#rm(list = ls())
#setwd('E:/gsj/RWD/WGCNA/')
#library(WGCNA)
#options(stringsAsFactors = FALSE)#這一步不能省
#enableWGCNAThreads()#這一條語句指的是允許WGCNA使用多線程,但是如果是在本機上使用的話,這一步過程可以跳過
#lnames = load(file = "FemaleLiver-01-dataInput.RData")
#以上,成功導(dǎo)入第一步生成的datExpr和datTraits
#lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
#lnames
#以上,成功導(dǎo)入第二步生成的參數(shù)
5.1 可視化基因互作網(wǎng)絡(luò)
#網(wǎng)絡(luò)可視化過程
#對所有基因畫熱圖
#這一步速度很慢,不建議做
dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 6);
plotTOM = dissTOM^7;
diag(plotTOM) = NA;
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

#隨機選取400個基因
nSelect = 400# For reproducibility, we set the random seedset.seed(10);select = sample(nGenes, size = nSelect);selectTOM = dissTOM[select, select];# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];# Open a graphical windowsizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing# the color palette; setting the diagonal to NA also improves the clarity of the plotplotDiss = selectTOM^7;diag(plotDiss) = NA;TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
5.2 可視化網(wǎng)絡(luò)圖中的eigenegenes
#根據(jù)模塊里面的eigenegenes(類似于主要的基因),對模塊進行聚類以及熱圖,標注出想要看的模塊
# Recalculate module eigengenes
MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
## 只有連續(xù)型性狀才能只有計算

MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
weight = as.data.frame(datTraits$weight_g);
names(weight) = "weight"
MET = orderMEs(cbind(MEs, weight))
# 出圖,模塊與性狀之間的關(guān)系

#下面是將兩張圖分別畫出的代碼
sizeGrWindow(5,7.5);
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle= 90)
# Plot the relationships among the eigengenes and the trait
sizeGrWindow(5,7.5);
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle= 90)
# Plot the dendrogram
sizeGrWindow(6,6);
par(cex = 1.0)
## 模塊的聚類圖
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),plotHeatmaps = FALSE)
# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
par(cex = 1.0)
## 性狀與模塊熱圖
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),plotDendrograms = FALSE, xLabelsAngle = 90)

第六步:Exporting a gene network to external visualization software

6.0 構(gòu)建R環(huán)境
#rm(list = ls())
#setwd('E:/gsj/RWD/WGCNA/')
#library(WGCNA)
#options(stringsAsFactors = FALSE)#這一步不能省
#enableWGCNAThreads()#這一條語句指的是允許WGCNA使用多線程,但是如果是在本機上使用的話,這一步過程可以跳過
#lnames = load(file = "FemaleLiver-01-dataInput.RData")
#以上,成功導(dǎo)入第一步生成的datExpr和datTraits
#lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
#lnames
#以上,成功導(dǎo)入第二步生成的參數(shù)
6.1 將數(shù)據(jù)導(dǎo)出VisANT
TOM = TOMsimilarityFromExpr(datExpr, power = 6);
annot = read.csv(file = "GeneAnnotation.csv");
#選擇模塊
module = "brown"
#選擇模塊的ID
probes = names(datExpr)
inModule = (moduleColors==module);
modProbes = probes[inModule];
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
vis = exportNetworkToVisANT(modTOM,
                            file = paste("VisANTInput-", module, ".txt", sep=""),
                            weighted = TRUE,
                            threshold = 0,
                            probeToGene = data.frame(annot$substanceBXH, annot$gene_symbol) )

#因為brown模塊很大,篩選出top30的基因
nTop = 30;
IMConn = softConnectivity(datExpr[, modProbes]);
top = (rank(-IMConn) <= nTop)
vis = exportNetworkToVisANT(modTOM[top, top],
                            file = paste("VisANTInput-", module, "-top30.txt", sep=""),
                            weighted = TRUE,
                            threshold = 0,
                            probeToGene = data.frame(annot$substanceBXH, annot$gene_symbol) )
6.2 導(dǎo)出Cytoscape
# Recalculate topological overlap if needed
TOM = TOMsimilarityFromExpr(datExpr, power = 6);
# Read in the annotation file
annot = read.csv(file = "GeneAnnotation.csv");
# Select modules
modules = c("brown", "red");
# Select module probes
probes = names(datExpr)
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
                               edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
                               nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
                               weighted = TRUE,
                               threshold = 0.02,
                               nodeNames = modProbes,
                               altNodeNames = modGenes,
                               nodeAttr = moduleColors[inModule]);
更多精彩內(nèi)容下載簡書APP
"小禮物走一走,來簡書關(guān)注我"
還沒有人贊賞,支持一下
總資產(chǎn)8 (約0.59元)共寫了3163字獲得108個贊共111個粉絲

    本站是提供個人知識管理的網(wǎng)絡(luò)存儲空間,所有內(nèi)容均由用戶發(fā)布,不代表本站觀點。請注意甄別內(nèi)容中的聯(lián)系方式、誘導(dǎo)購買等信息,謹防詐騙。如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請點擊一鍵舉報。
    轉(zhuǎn)藏 分享 獻花(0

    0條評論

    發(fā)表

    請遵守用戶 評論公約

    類似文章 更多