[5f3b2d]: / part4_extra_requests_by_reviewers.Rmd

Download this file

142 lines (128 with data), 7.0 kB

---
title: "part4_extra_requests_by_reviewers"
author: "Gargya Péter"
date: '2021 augusztus 19 '
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

Releatinship between FIGO stage and our 12 most important transcript
```{r}
setwd("E:/Egyetem/TDK/TCGA_uterus")
library(dplyr)

original=read.delim("ucec_tcga_clinical_data.tsv")
print(nrow(original))
original=original %>% mutate_all(as.character)
original$Sample.ID=gsub("-", ".", original$Sample.ID)
original <- filter(original,substr(original$Sample.ID,14,15)=="01")
print(nrow(original))
for(i in 1:nrow(original)){if(original$Neoplasm.Histologic.Grade[i]=="High Grade"){original$Neoplasm.Histologic.Grade[i]<-"G3"}}
original=filter(original, original$Neoplasm.Histologic.Type.Name=="Endometrioid endometrial adenocarcinoma")
print(nrow(original))
original=filter(original, original$Neoadjuvant.Therapy.Type.Administered.Prior.To.Resection.Text=="No")
print(nrow(original))
table(original$Neoplasm.Histologic.Grade)
original$stage=original$Neoplasm.American.Joint.Committee.on.Cancer.Clinical.Group.Stage
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IA" | original$stage[i]=="Stage IB" | original$stage[i]=="Stage IC")
  {original$stage[i]<-"Stage I"}}
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IIA" | original$stage[i]=="Stage IIB")
  {original$stage[i]<-"Stage II"}}
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IIIA" | original$stage[i]=="Stage IIIB" | original$stage[i]=="Stage IIIC" | original$stage[i]=="Stage IIIC1" | original$stage[i]=="Stage IIIC2")
  {original$stage[i]<-"Stage III"}}
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IVA" | original$stage[i]=="Stage IVB")
  {original$stage[i]<-"Stage IV"}}
original$stage_copy=original$stage
original$stage=ifelse(original$stage=="Stage I","local","advanced")

rnaseq=read.delim('tcgaBiolinks_uterus_rnaseq_raw.txt')
rownames(rnaseq)=substr(rnaseq$X1,1,15); rnaseq$X1=NULL
names(rnaseq)=substr(names(rnaseq),1,15)
rnaseq=as.data.frame(t(rnaseq))
rnaseq$sample=rownames(rnaseq)
rnaseq=filter(rnaseq, rnaseq$sample %in% original$Sample.ID)
rnaseq=filter(rnaseq, duplicated(rnaseq$sample)==FALSE)
rownames(rnaseq)=rnaseq$sample
original=filter(original, original$Sample.ID %in% rownames(rnaseq))
original=arrange(original, original$Sample.ID)
rnaseq=arrange(rnaseq, rnaseq$sample)
all(original$Sample.ID==rnaseq$sample)
rnaseq$sample=NULL
rnaseq=as.data.frame(t(rnaseq))
coldata=data.frame(grade=original$Neoplasm.Histologic.Grade, stage=original$stage, row.names = colnames(rnaseq))
cts=as.matrix(rnaseq)
library("DESeq2")
library("BiocParallel")
ddsTrain <- DESeqDataSetFromMatrix(countData = cts, colData = coldata, design = ~stage) #!!!!!!!!!!!
keep <- rowMeans(counts(ddsTrain)) > 4
ddsTrain <- ddsTrain[keep,]
dim(ddsTrain)
dds <- DESeq(ddsTrain, parallel =T, BPPARAM=MulticoreParam(15))
res <- results(dds)
pdf("MAplot.pdf",  width=10, height=10)
DESeq2::plotMA(res, ylim=c(-2,2))
write.table(res, file='stage__uterus_deseq2_results.txt', sep='\t',  row.names=TRUE, col.names=TRUE, quote=FALSE)
```
```{r}
setwd("E:/Egyetem/TDK/TCGA_uterus")
library(dplyr)

df=read.delim("stage__uterus_deseq2_results.txt")
df$gene=rownames(df)
df=filter(df, df$padj<0.05)
library(mygene)
asd=queryMany(df$gene, scopes="ensembl.gene", fields=c("uniprot", "symbol", "reporter"), species="human")
df$gene=asd$symbol
genes=c("DKK4","MAL", "MLF1", "RPL41P1", "RPS28P7", "B3GAT1-DT", "FOXB1", "UCHL1", "CRABP1", "LOC283177", "PEG10", "HABP2", "EDN3", "LOC283177", "MLF1", "DKK4")
df2=filter(df, df$gene %in% genes)
dim(df2)
```

TCGA subgroups and our 12 transcripts
```{r}
setwd("E:/Egyetem/TDK/TCGA_uterus")
#setwd('/data10/working_groups/balint_group/gargya.peter/R/uterus/')
library(dplyr)

original=read.delim("ucec_tcga_clinical_data.tsv")
original=original %>% mutate_all(as.character)
original$Sample.ID=gsub("-", ".", original$Sample.ID)
original <- filter(original,substr(original$Sample.ID,14,15)=="01")
for(i in 1:nrow(original)){if(original$Neoplasm.Histologic.Grade[i]=="High Grade"){original$Neoplasm.Histologic.Grade[i]<-"G3"}}
original=filter(original, original$Neoplasm.Histologic.Type.Name=="Endometrioid endometrial adenocarcinoma")
original=filter(original, original$Neoadjuvant.Therapy.Type.Administered.Prior.To.Resection.Text=="No")
table(original$Neoplasm.Histologic.Grade)
original$stage=original$Neoplasm.American.Joint.Committee.on.Cancer.Clinical.Group.Stage
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IA" | original$stage[i]=="Stage IB" | original$stage[i]=="Stage IC")
  {original$stage[i]<-"Stage I"}}
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IIA" | original$stage[i]=="Stage IIB")
  {original$stage[i]<-"Stage II"}}
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IIIA" | original$stage[i]=="Stage IIIB" | original$stage[i]=="Stage IIIC" | original$stage[i]=="Stage IIIC1" | original$stage[i]=="Stage IIIC2")
  {original$stage[i]<-"Stage III"}}
for(i in 1:nrow(original)){if(original$stage[i]=="Stage IVA" | original$stage[i]=="Stage IVB")
  {original$stage[i]<-"Stage IV"}}
original$stage_copy=original$stage
original$stage=ifelse(original$stage=="Stage I","local","advanced")

subtype_df=read.delim("TCGASubtype.20170308.tsv")
#Tsv file comes from Xenabrowser: TCGA Pan-Cancer dataset.
#https://xenabrowser.net/datapages/?dataset=TCGASubtype.20170308.tsv&host=https%3A%2F%2Fpancanatlas.xenahubs.net&removeHub=https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443
subtype_df=subtype_df %>% mutate_all(as.character)
subtype_df$sampleID=gsub("-", ".", subtype_df$sampleID)
subtype_df=filter(subtype_df, subtype_df$sampleID %in% original$Sample.ID)

original=filter(original, original$Sample.ID %in% subtype_df$sampleID)
all(original$Sample.ID==subtype_df$sampleID)
original$Subtype_Selected=subtype_df$Subtype_Selected

print(table(filter(original, original$Neoplasm.Histologic.Grade=="G1")$Subtype_Selected))
print(table(filter(original, original$Neoplasm.Histologic.Grade=="G2")$Subtype_Selected))
print(table(filter(original, original$Neoplasm.Histologic.Grade=="G3")$Subtype_Selected))

copy=original
g2=read.delim("G2_preds_with_mingenes.txt")
original=filter(original, original$Sample.ID %in% g2$samples)
g2=filter(g2, g2$samples %in% original$Sample.ID)
original=arrange(original, original$Sample.ID)
g2=arrange(g2, g2$samples)
all(g2$samples==original$Sample.ID)
original$pred_proba=g2$pred_proba
original$label=ifelse(original$pred_proba>=0.7420612247092881, "high-risk", "low-risk")

hr=filter(original, original$label=="high-risk"); lr=filter(original, original$label=="low-risk")
table(hr$Subtype_Selected); table(lr$Subtype_Selected)

chisq.test(original$Subtype_Selected, original$label)
copy2=filter(copy, copy$Neoplasm.Histologic.Grade!="G2")
chisq.test(copy2$Subtype_Selected, copy2$Neoplasm.Histologic.Grade)
```