一次完全失敗的嘗試
COMPLETE FAILURE

以 HTML table 形式呈現(xiàn)芯片數(shù)據(jù)的好處是可以加上各個(gè)公共注釋數(shù)據(jù)庫的鏈接，便于下游分析，而且小巧便捷，用瀏覽器可以直接打開。

制作這樣的 HTML table 的先決條件是要有所用芯片的注釋信息，當(dāng)然制造商一般會(huì)提供可導(dǎo)入 R 的文件格式，例如用 read.csv() 就可以加載 Affymetrix 的 csv 文件。

1. 加載包&數(shù)據(jù)準(zhǔn)備

以 Affymetrix HG-U95Av2 芯片為例，隨機(jī)取15個(gè)基因：

library(annotate)
data("sample.ExpressionSet")
igenes <- featureNames(sample.ExpressionSet)[246:260]

2. 準(zhǔn)備注釋數(shù)據(jù)

library(hgu95av2.db)

## GenBank ID
gb <- as.character(unlist(lookUp(igenes, "hgu95av2", "ACCNUM")))
gb
#  [1] "M57423"   "Z70218"   "L17328"   "S81916"   "U63332"   "M77235"  
#  [7] "X98175"   "AB019392" "J03071"   "D25272"   "D63789"   "D63789"  
# [13] "U19142"   "U19147"   "X16863"

## LocusLink ID
ll <- as.character(unlist(lookUp(igenes, "hgu95av2", "ENTREZID")))
ll
#  [1] "221823" "4330"   "9637"   "5230"   "4287"   "6331"   "841"    "27335" 
#  [9] NA       NA       "6846"   NA       NA       NA       "2215"

## UniGene ID
ug <- as.character(unlist(lookUp(igenes, "hgu95av2", "UNIGENE")))
ug
#  [1] "Hs.169284" "Hs.268515" "Hs.258563" "Hs.78771"  "Hs.532632" "Hs.517898"
#  [7] "Hs.599762" "Hs.314359" NA          NA          "Hs.458346" NA         
# [13] NA          NA          "Hs.372679" "Hs.694258" "Hs.736230"

## SwissProt ID 一個(gè)探針對(duì)應(yīng)多個(gè)UNIPROT ID，保留為list格式
sp <- lookUp(igenes, "hgu95av2", "UNIPROT")
names(sp) <- NULL
sp
# [[1]]
# [1] "P21108"
# 
# [[2]]
# [1] "A0A024R1C3" "Q10571"    
# 
# [[3]]
# [1] "Q9UHY8"
# 
# [[4]]
# [1] "P00558" "V9HWF4"
# 
# [[5]]
# [1] "P54252"     "A0A0A0MS38" "C9JQV6"    
# 
# [[6]]
# [1] "Q14524"     "Q86V90"     "H9KVD2"     "E9PG18"     "E9PHB6"    
# [6] "K4DIA1"     "A0A0A0MT39"
# 
# [[7]]
# [1] "A0A024R3Z8" "Q14790"    
# 
# [[8]]
# [1] "K7ERF1" "U3LUI4" "K7EK53" "Q9UBQ5"
# 
# [[9]]
# [1] NA
# 
# [[10]]
# [1] NA
# 
# [[11]]
# [1] "Q9UBD3"
# 
# [[12]]
# [1] NA
# 
# [[13]]
# [1] NA
# 
# [[14]]
# [1] NA
# 
# [[15]]
# [1] "O75015"     "M9MML6"     "A0A087WZR4" "A0A087WU90"

需要把 NA 轉(zhuǎn)換為 " &nbsp ; "，原因嘛，我也說不清....

Because the code for SwissProt IDs will not automatically handle missing data, we have to convert the missing data to an HTML empty cell identifier ("?")

## 寫個(gè)循環(huán)改一下
for(i in 1:length(names)) {
  if(i == 9 || i == 10 || i ==12 ||
     i ==13 || i == 14)
  names[i] <- "&nbsp;"
}

sp
# [[1]]
# [1] "P21108"
# 
# [[2]]
# [1] "A0A024R1C3" "Q10571"    
# 
# [[3]]
# [1] "Q9UHY8"
# 
# [[4]]
# [1] "P00558" "V9HWF4"
# 
# [[5]]
# [1] "P54252"     "A0A0A0MS38" "C9JQV6"    
# 
# [[6]]
# [1] "Q14524"     "Q86V90"     "H9KVD2"     "E9PG18"     "E9PHB6"    
# [6] "K4DIA1"     "A0A0A0MT39"
# 
# [[7]]
# [1] "A0A024R3Z8" "Q14790"    
# 
# [[8]]
# [1] "K7ERF1" "U3LUI4" "K7EK53" "Q9UBQ5"
# 
# [[9]]
# [1] "&nbsp;"
# 
# [[10]]
# [1] "&nbsp;"
# 
# [[11]]
# [1] "Q9UBD3"
# 
# [[12]]
# [1] "&nbsp;"
# 
# [[13]]
# [1] "&nbsp;"
# 
# [[14]]
# [1] "&nbsp;"
# 
# [[15]]
# [1] "O75015"     "M9MML6"     "A0A087WZR4" "A0A087WU90"

## Gene Name
names <- as.character(unlist(lookUp(igenes, "hgu95av2", "GENENAME")))
names
#  [1] "phosphoribosyl pyrophosphate synthetase 1 like 1"    
#  [2] "MN1 proto-oncogene, transcriptional regulator"       
#  [3] "fasciculation and elongation protein zeta 2"         
#  [4] "phosphoglycerate kinase 1"                           
#  [5] "ataxin 3"                                            
#  [6] "sodium voltage-gated channel alpha subunit 5"        
#  [7] "caspase 8"                                           
#  [8] "eukaryotic translation initiation factor 3 subunit K"
#  [9] NA                                                    
# [10] NA                                                    
# [11] "X-C motif chemokine ligand 2"                        
# [12] NA                                                    
# [13] NA                                                    
# [14] NA                                                    
# [15] "Fc fragment of IgG receptor IIIb"    
for(i in 1:length(names)) {
  if(i == 9 || i == 10 || i ==12 ||
     i ==13 || i == 14)
  names[i] <- "&nbsp;"
}
names
#  [1] "phosphoribosyl pyrophosphate synthetase 1 like 1"    
#  [2] "MN1 proto-oncogene, transcriptional regulator"       
#  [3] "fasciculation and elongation protein zeta 2"         
#  [4] "phosphoglycerate kinase 1"                           
#  [5] "ataxin 3"                                            
#  [6] "sodium voltage-gated channel alpha subunit 5"        
#  [7] "caspase 8"                                           
#  [8] "eukaryotic translation initiation factor 3 subunit K"
#  [9] "&nbsp;"                                               
# [10] "&nbsp;"                                               
# [11] "X-C motif chemokine ligand 2"                        
# [12] "&nbsp;"                                               
# [13] "&nbsp;"                                               
# [14] "&nbsp;"                                               
# [15] "Fc fragment of IgG receptor IIIb"

3. 一些相關(guān)的值

取前10個(gè)基因作為例子：

dat <- exprs(sample.ExpressionSet)[igenes,1:10]
FC <- rowMeans(dat[igenes,1:5]) - rowMeans(dat[igenes,6:10]) 
pval <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$p.value)
tstat <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$statistic)

4. 構(gòu)建 HTML table

genelist <- list(igenes, ug, ll, gb, sp)
filename <- "Interesting_genes.html" 
title <- "An Artificial Set of Interesting Genes" 
othernames <- list(names, round(tstat, 2), round(pval, 3), round(FC, 1), round(dat, 2))
head <- c("Probe ID", "UniGene", "LocusLink", "GenBank", "SwissProt", "Gene Name", "t-statistic", "p-value",
          "Fold Change", "Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6",
          "Sample 7", "Sample 8", "Sample 9", "Sample 10")
repository <- list("affy", "ug", "en", "gb", "sp") 
htmlpage(genelist, filename, title, othernames, head, repository = repository)

然而很絕望....

所以是為什么呢，一步一步和作者的代碼對(duì)照，只能發(fā)現(xiàn)??

區(qū)別主要就是這么個(gè)區(qū)別，于是只好再操作一波。

for(i in 1:15) {
  if(i == 9 || i == 10 || i ==12 ||
     i ==13 || i == 14)
    ll[i] <- "---"
}
for(i in 1:15) {
  if(i == 9 || i == 10 || i ==12 ||
     i ==13 || i == 14)
    ug[i] <- "---"
}

ll
#  [1] "221823" "4330"   "9637"   "5230"   "4287"   "6331"   "841"    "27335"  "---"   
# [10] "---"    "6846"   "---"    "---"    "---"    "2215"

ug
#  [1] "Hs.169284" "Hs.268515" "Hs.258563" "Hs.78771"  "Hs.532632" "Hs.517898"
#  [7] "Hs.599762" "Hs.314359" "---"       "---"       "Hs.458346" "---"      
# [13] "---"       "---"       "Hs.372679" "Hs.694258" "Hs.736230"

還是不行....一毛一樣的報(bào)錯(cuò)

絕望中打開了作者的 R Script

所以其實(shí)都是這樣來的嗎??

那么，如果，按照這些代碼從頭來一遍會(huì)怎樣呢？

fine 果然成功了。
而且也沒多么好看嘛，作者原話是 nice looking&pretty

“已經(jīng)折騰了半天就不能白折騰”

現(xiàn)在要開始做一些也許毫無意義的排錯(cuò)。

> gb_t <- as.character(unlist(lookUp(igenes, "hgu95av2", "ACCNUM")))
> gb
 [1] "M57423"   "Z70218"   "L17328"   "S81916"   "U63332"  
 [6] "M77235"   "X98175"   "AB019392" "J03071"   "D25272"  
[11] "D63789"   "D63789"   "U19142"   "U19147"   "X16863"  
> gb_t
 [1] "M57423"   "Z70218"   "L17328"   "S81916"   "U63332"  
 [6] "M77235"   "X98175"   "AB019392" "J03071"   "D25272"  
[11] "D63789"   "D63789"   "U19142"   "U19147"   "X16863"  
> class(gb)
[1] "character"
> class(gb_t)
[1] "character"

GenBank ID clear

> ll_t <- as.character(unlist(lookUp(igenes, "hgu95av2", "ENTREZID")))
> ll
 [1] "221823" "4330"   "9637"   "---"    "---"    "6331"   "841"   
 [8] "27335"  "---"    "---"    "6375"   "---"    "2543"   "2578"  
[15] "2215"  
> ll_t
 [1] "221823" "4330"   "9637"   "5230"   "4287"   "6331"   "841"   
 [8] "27335"  NA       NA       "6846"   NA       NA       NA      
[15] "2215"  
> for(i in 1:15) {
+   if(i == 9 || i == 10 || i ==12 ||
+      i ==13 || i == 14)
+     ll_t[i] <- "---"
+ }
> ll_t
 [1] "221823" "4330"   "9637"   "5230"   "4287"   "6331"   "841"   
 [8] "27335"  "---"    "---"    "6846"   "---"    "---"    "---"   
[15] "2215" 
> class(ll)
[1] "character"
> class(ll_t)
[1] "character"

LocusLink ID clear

> ug_t <- as.character(unlist(lookUp(igenes, "hgu95av2", "UNIGENE")))
> ug
 [1] "Hs.169284" "Hs.268515" "Hs.103419" "Hs.380429" "---"      
 [6] "Hs.169331" "Hs.381231" "Hs.283781" "---"       "---"      
[11] "Hs.3195"   "---"       "Hs.176660" "Hs.272484" "Hs.372679"
> ug_t
 [1] "Hs.169284" "Hs.268515" "Hs.258563" "Hs.78771"  "Hs.532632"
 [6] "Hs.517898" "Hs.599762" "Hs.314359" NA          NA         
[11] "Hs.458346" NA          NA          NA          "Hs.372679"
[16] "Hs.694258" "Hs.736230"
> for(i in 1:15) {
+   if(i == 9 || i == 10 || i ==12 ||
+      i ==13 || i == 14)
+     ug_t[i] <- "---"
+ }
> ug_t
 [1] "Hs.169284" "Hs.268515" "Hs.258563" "Hs.78771"  "Hs.532632"
 [6] "Hs.517898" "Hs.599762" "Hs.314359" "---"       "---"      
[11] "Hs.458346" "---"       "---"       "---"       "Hs.372679"
[16] "Hs.694258" "Hs.736230"
> class(ug)
[1] "character"
> class(ug_t)
[1] "character"

UniGene ID clear

> sp_t <- lookUp(igenes, "hgu95av2", "UNIPROT")
> sp_t
$`31485_at`
[1] "P21108"

$`31486_s_at`
[1] "A0A024R1C3" "Q10571"    

$`31487_at`
[1] "Q9UHY8"

$`31488_s_at`
[1] "P00558" "V9HWF4"

$`31489_at`
[1] "P54252"     "A0A0A0MS38" "C9JQV6"    

$`31490_at`
[1] "Q14524"     "Q86V90"     "H9KVD2"     "E9PG18"     "E9PHB6"    
[6] "K4DIA1"     "A0A0A0MT39"

$`31491_s_at`
[1] "A0A024R3Z8" "Q14790"    

$`31492_at`
[1] "K7ERF1" "U3LUI4" "K7EK53" "Q9UBQ5"

$`31493_s_at`
[1] NA

$`31494_at`
[1] NA

$`31495_at`
[1] "Q9UBD3"

$`31496_g_at`
[1] NA

$`31497_at`
[1] NA

$`31498_f_at`
[1] NA

$`31499_s_at`
[1] "O75015"     "M9MML6"     "A0A087WZR4" "A0A087WU90"

> names(sp_t) <- NULL
> for(i in 1:15) {
+   if(i == 9 || i == 10 || i ==12 ||
+      i ==13 || i == 14)
+     sp_t[[i]][1] <- "&nbsp;"
+ }
> sp
[[1]]
[1] "P21108"

[[2]]
[1] "Q10571"

[[3]]
[1] "Q9UHY8"

[[4]]
[1] "Q16444"

[[5]]
[1] "&nbsp;"

[[6]]
[1] "Q14524 "  " Q8IZC9 " " Q8WTQ6 " " Q8WWN5 " " Q96J69" 

[[7]]
[1] "Q14790"

[[8]]
[1] "Q9UBQ5"

[[9]]
[1] "&nbsp;"

[[10]]
[1] "&nbsp;"

[[11]]
[1] "P47992"

[[12]]
[1] "&nbsp;"

[[13]]
[1] "Q13065 " " Q8IYC5"

[[14]]
[1] "Q13070"

[[15]]
[1] "O75015"

> sp_t
[[1]]
[1] "P21108"

[[2]]
[1] "A0A024R1C3" "Q10571"    

[[3]]
[1] "Q9UHY8"

[[4]]
[1] "P00558" "V9HWF4"

[[5]]
[1] "P54252"     "A0A0A0MS38" "C9JQV6"    

[[6]]
[1] "Q14524"     "Q86V90"     "H9KVD2"     "E9PG18"     "E9PHB6"    
[6] "K4DIA1"     "A0A0A0MT39"

[[7]]
[1] "A0A024R3Z8" "Q14790"    

[[8]]
[1] "K7ERF1" "U3LUI4" "K7EK53" "Q9UBQ5"

[[9]]
[1] "&nbsp;"

[[10]]
[1] "&nbsp;"

[[11]]
[1] "Q9UBD3"

[[12]]
[1] "&nbsp;"

[[13]]
[1] "&nbsp;"

[[14]]
[1] "&nbsp;"

[[15]]
[1] "O75015"     "M9MML6"     "A0A087WZR4" "A0A087WU90"

> class(sp)
[1] "list"
> class(sp_t)
[1] "list"

SwissProt ID clear

> name_t <- as.character(unlist(lookUp(igenes, "hgu95av2", "GENENAME")))
> name_t
 [1] "phosphoribosyl pyrophosphate synthetase 1 like 1"    
 [2] "MN1 proto-oncogene, transcriptional regulator"       
 [3] "fasciculation and elongation protein zeta 2"         
 [4] "phosphoglycerate kinase 1"                           
 [5] "ataxin 3"                                            
 [6] "sodium voltage-gated channel alpha subunit 5"        
 [7] "caspase 8"                                           
 [8] "eukaryotic translation initiation factor 3 subunit K"
 [9] NA                                                    
[10] NA                                                    
[11] "X-C motif chemokine ligand 2"                        
[12] NA                                                    
[13] NA                                                    
[14] NA                                                    
[15] "Fc fragment of IgG receptor IIIb"   
> for(i in 1:length(names)) {
+   if(i == 9 || i == 10 || i ==12 ||
+      i ==13 || i == 14)
+     name_t[i] <- "&nbsp;"
+ }
> name
 [1] "hypothetical protein LOC221823"                            
 [2] "meningioma (disrupted in balanced translocation) 1"        
 [3] "fasciculation and elongation protein zeta 2 (zygin II)"    
 [4] "Phosphoglycerate kinase {alternatively spliced}"           
 [5] "&nbsp;"                                                    
 [6] "sodium channel, voltage-gated, type V, alpha polypeptide"  
 [7] "caspase 8, apoptosis-related cysteine protease"            
 [8] "muscle specific gene"                                      
 [9] "&nbsp;"                                                    
[10] "&nbsp;"                                                    
[11] "chemokine (C motif) ligand 1"                              
[12] "&nbsp;"                                                    
[13] "G antigen 1"                                               
[14] "G antigen 6"                                               
[15] "Fc fragment of IgG, low affinity IIIb, receptor for (CD16)"
> name_t
 [1] "phosphoribosyl pyrophosphate synthetase 1 like 1"    
 [2] "MN1 proto-oncogene, transcriptional regulator"       
 [3] "fasciculation and elongation protein zeta 2"         
 [4] "phosphoglycerate kinase 1"                           
 [5] "ataxin 3"                                            
 [6] "sodium voltage-gated channel alpha subunit 5"        
 [7] "caspase 8"                                           
 [8] "eukaryotic translation initiation factor 3 subunit K"
 [9] "&nbsp;"                                              
[10] "&nbsp;"                                              
[11] "X-C motif chemokine ligand 2"                        
[12] "&nbsp;"                                              
[13] "&nbsp;"                                              
[14] "&nbsp;"                                              
[15] "Fc fragment of IgG receptor IIIb"  
> class(name)
[1] "character"
> class(name_t)
[1] "character"

Gene Name clear

## 這些都是一毛一樣的
dat <- exprs(sample.ExpressionSet)[igenes,1:10]
FC <- rowMeans(dat[igenes,1:5]) - rowMeans(dat[igenes,6:10]) 
pval <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$p.value)
tstat <- esApply(sample.ExpressionSet[igenes,1:10], 1, function(x) t.test(x[1:5], x[6:10])$statistic)

## 剩下的都對(duì)應(yīng)地加上"_t"
genelist_t <- list(igenes, ug_t, ll_t, gb_t, sp_t)
filename_t <- "Interesting_genes_t.html"
title_t <- "An Artificial Set of Interesting Genes"
othernames_t <- list(name_t, round(tstat, 2), round(pval, 3), round(FC, 1), round(dat, 2))
head_t <- c("Probe ID", "UniGene", "LocusLink", "GenBank", "SwissProt", "Gene Name", "t-statistic", "p-value",
          "Fold Change", "Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6",
          "Sample 7", "Sample 8", "Sample 9", "Sample 10")
repository_t <- list("affy", "ug", "en", "gb", "sp")
htmlpage(genelist_t, filename_t, title_t, othernames_t, head_t, repository = repository_t)

行吧。

最后，向大家隆重推薦生信技能樹的一系列干貨！

生信技能樹全球公益巡講：https://mp.weixin.qq.com/s/E9ykuIbc-2Ja9HOY0bn_6g
B站公益74小時(shí)生信工程師教學(xué)視頻合輯：https://mp.weixin.qq.com/s/IyFK7l_WBAiUgqQi8O7Hxw
招學(xué)徒：https://mp.weixin.qq.com/s/KgbilzXnFjbKKunuw7NVfw

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

annotate: 把芯片數(shù)據(jù)導(dǎo)出為好看的 HTML

annotate: 把芯片數(shù)據(jù)導(dǎo)出為好看的 HTML

1. 加載包&數(shù)據(jù)準(zhǔn)備

2. 準(zhǔn)備注釋數(shù)據(jù)

3. 一些相關(guān)的值

4. 構(gòu)建 HTML table

最后，向大家隆重推薦生信技能樹的一系列干貨！

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

annotate: 把芯片數(shù)據(jù)導(dǎo)出為好看的 HTML

1. 加載包&數(shù)據(jù)準(zhǔn)備

2. 準(zhǔn)備注釋數(shù)據(jù)

3. 一些相關(guān)的值

4. 構(gòu)建 HTML table

最后，向大家隆重推薦生信技能樹的一系列干貨！

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

最后，向大家隆重推薦生信技能樹的一系列干貨！