-
group_list分組
第一步:清除之前所有變量+加載之前的數(shù)據(jù)
> rm(list = ls()) #表示清除所有變量;ls當(dāng)前目錄賦值給列表,接著清除
#ls() 返回global environment 里面的所有object的名字。
#是一個(gè)character vector
> load(file = "step1output.Rdata")#加載工作目錄下之前保存的數(shù)據(jù)
> library(stringr)#加載str包
第二步,確認(rèn)分組的目標(biāo)
#前文提到的pd中有臨床信息,其中title中顯示了control組和實(shí)驗(yàn)組
> pd$title
[1] "A375 cells 24h Control rep1" "A375 cells 24h Control rep2"
[3] "A375 cells 24h Control rep3" "A375 cells 24h Vemurafenib rep1"
[5] "A375 cells 24h Vemurafenib rep2" "A375 cells 24h Vemurafenib rep3"

pd
第三步,分組向量生成
> group_list=c(rep("control",times=3),rep("treat",times=3))
> group_list
[1] "control" "control" "control" "treat" "treat" "treat"
> #第三類(lèi),ifelse
> library(stringr)#這個(gè)包可以用函數(shù)str_detect()
> group_list=ifelse(str_detect(pd$title,"Control"),"control","treat")
> group_list
[1] "control" "control" "control" "treat" "treat" "treat"
#第一個(gè)為判斷條件,第二為true,第三false
#設(shè)置參考水平,對(duì)照在前,處理在后
#str_detect(string字符串, pattern匹配字符),返回邏輯值,是檢測(cè)函數(shù);
#用于檢測(cè)字符串中是否存在某種匹配模式;
#val <- c("abca4", 123, "cba2");str_detect(val, "a")檢查Val是否有字符串a(chǎn);TRUE FALSE TRUE
#pd$title中有6個(gè),返回6個(gè),TRUE返回第一個(gè)control;FALSE返回為treatment
第四步,設(shè)置因子
> group_list = factor(group_list,#生成因子的意義,后面的差異分析是處理/對(duì)照
levels = c("control","treat"))
#levels規(guī)定誰(shuí)在前面誰(shuí)是對(duì)照,注意順序,所有加用level
#芯片注釋?zhuān)檎倚酒脚_(tái)對(duì)應(yīng)的包,到此腳本中替換
-
芯片注釋
芯片注釋?zhuān)檎倚酒脚_(tái)對(duì)應(yīng)的包,到此腳本中替換
gpl #取網(wǎng)頁(yè)搜索GPL編號(hào),ctrl+F,獲取相應(yīng)的注釋包
http://www.bio-info-trainee.com/1399.html
芯片探針與基因的對(duì)應(yīng)關(guān)系http://www.bio-info-trainee.com/1399.html

image.png
第一步,安裝并加載hugene10sttranscriptcluster.db包
> gpl #取網(wǎng)頁(yè)搜索GPL編號(hào),ctrl+F,獲取相應(yīng)的注釋包
[1] "GPL6244"
>if(!require(hugene10sttranscriptcluster.db))BiocManager::install("hugene10sttranscriptcluster.db")
#require()表示加載,返回的是邏輯值,TRUE時(shí)表示已加載,F(xiàn)ALSE表示未加載;!表示否定
#先安裝;ls("package:tidyr")函數(shù)用法
> library(hugene10sttranscriptcluster.db)
> ls("package:hugene10sttranscriptcluster.db")#顯示包里的所有目錄
[1] "hugene10sttranscriptcluster"
[2] "hugene10sttranscriptcluster.db"
[3] "hugene10sttranscriptcluster_dbconn"
[4] "hugene10sttranscriptcluster_dbfile"
[5] "hugene10sttranscriptcluster_dbInfo"
[6] "hugene10sttranscriptcluster_dbschema"
[7] "hugene10sttranscriptclusterACCNUM"
[8] "hugene10sttranscriptclusterALIAS2PROBE"
[9] "hugene10sttranscriptclusterCHR"
[10] "hugene10sttranscriptclusterCHRLENGTHS"
[11] "hugene10sttranscriptclusterCHRLOC"
[12] "hugene10sttranscriptclusterCHRLOCEND"
[13] "hugene10sttranscriptclusterENSEMBL"
[14] "hugene10sttranscriptclusterENSEMBL2PROBE"
[15] "hugene10sttranscriptclusterENTREZID"
[16] "hugene10sttranscriptclusterENZYME"
[17] "hugene10sttranscriptclusterENZYME2PROBE"
[18] "hugene10sttranscriptclusterGENENAME"
[19] "hugene10sttranscriptclusterGO"
[20] "hugene10sttranscriptclusterGO2ALLPROBES"
[21] "hugene10sttranscriptclusterGO2PROBE"
[22] "hugene10sttranscriptclusterMAP"
[23] "hugene10sttranscriptclusterMAPCOUNTS"
[24] "hugene10sttranscriptclusterOMIM"
[25] "hugene10sttranscriptclusterORGANISM"
[26] "hugene10sttranscriptclusterORGPKG"
[27] "hugene10sttranscriptclusterPATH"
[28] "hugene10sttranscriptclusterPATH2PROBE"
[29] "hugene10sttranscriptclusterPFAM"
[30] "hugene10sttranscriptclusterPMID"
[31] "hugene10sttranscriptclusterPMID2PROBE"
[32] "hugene10sttranscriptclusterPROSITE"
[33] "hugene10sttranscriptclusterREFSEQ"
[34] "hugene10sttranscriptclusterSYMBOL" ###重要
[35] "hugene10sttranscriptclusterUNIGENE"
[36] "hugene10sttranscriptclusterUNIPROT"
#View(hugene10sttranscriptclusterSYMBOL)
#str(hugene10sttranscriptclusterSYMBOL)
#View(hugene10sttranscriptclusterSYMBOL)
第二步,將hugene10sttranscriptclusterSYMBOL中的數(shù)據(jù)用數(shù)據(jù)框封裝
> ids <- toTable(hugene10sttranscriptclusterSYMBOL)#把包里的數(shù)據(jù)變成數(shù)據(jù)框
#toTable是一種能夠以數(shù)據(jù)框的形式來(lái)操作一個(gè)Bimap對(duì)象的方法,
#也就是把Bimap對(duì)象轉(zhuǎn)換為一個(gè)數(shù)據(jù)框,
#這些方法是Bimap interface方法的一部分。
#Bimap指的是一種映射關(guān)系,例如探針的編號(hào)與基因名稱(chēng)之間的映射
head(ids)#只有兩列數(shù)據(jù)probe_id和symbol
probe_id symbol
1 7896759 LINC01128
2 7896761 SAMD11
3 7896779 KLHL17
4 7896798 PLEKHN1
5 7896817 ISG15
6 7896822 AGRN
#View(ids)
save(exp,group_list,ids,file = "step2output.Rdata")
繼續(xù)了解probe_id和symbol在該分析中的作用