Switch to unified view

a b/b_DownstreamAnalysisScript/downloadData.R
1
2
# MESSAGE -----------------------------------------------------------------
3
# 
4
# author: Yulin Lyu
5
# email: lvyulin@pku.edu.cn
6
# 
7
# require: R whatever
8
# 
9
# ---
10
11
# * Load packages ---------------------------------------------------------
12
13
library(tidyverse)
14
library(magrittr)
15
library(glue)
16
17
# * From GEO --------------------------------------------------------------
18
19
setwd("/mnt/f") # download dir
20
21
# RNA
22
setwd("exampleData/RNA/raw")
23
geo <- "GSE147839"
24
sra <- "SRP254790"
25
bioProj <- "PRJNA622253"
26
27
# ATAC
28
setwd("exampleData/ATAC/raw")
29
geo <- "GSE157237"
30
sra <- "SRP279550"
31
bioProj <- "PRJNA660602"
32
33
# download from SRA directly using wget (slow)
34
35
dir.create("sra")
36
37
download.file(
38
  glue("http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?save=efetch&db=sra&rettype=runinfo&term={sra}"),
39
  glue("{where}/{sra}.csv", where = getwd()))
40
41
projMeta <- read_csv(glue("{where}/{sra}.csv", where = getwd()))
42
43
srr <- projMeta$Run
44
downPath <- projMeta$download_path
45
46
down_cmd <- glue("wget -b -c -o sra/{srr}.log -O sra/{srr}.sra {downPath}")
47
48
write.table(c("#!/bin/bash\n", down_cmd), "down.sh", sep = "\n", quote = F, row.names = F, col.names = F)
49
50
# download from ENA using aspera (fast)
51
# NOTE: NOT all datasets in SRA are accessible in ENA
52
53
# <todo> I will complete this part when needed in the future.
54
55
56
# * From ArrayExpress -----------------------------------------------------
57
58
# <todo> I will complete this part when needed in the future.
59
60
61
# * Extract fastq ---------------------------------------------------------
62
63
dir.create("fastq")
64
65
ext_cmd <- glue("fasterq-dump -e 10 -3 -O fastq sra/{srr}.sra")
66
67
write.table(c("#!/bin/bash\n", ext_cmd), "ext.sh", sep = "\n", quote = F, row.names = F, col.names = F)
68
69
gzip_cmd <- glue(
70
  "gzip fastq/{srr}.sra_1.fastq &
71
  gzip fastq/{srr}.sra_2.fastq &")
72
73
write.table(c("#!/bin/bash\n", gzip_cmd), "gzip.sh", sep = "\n", quote = F, row.names = F, col.names = F)
74