|
a |
|
b/b_DownstreamAnalysisScript/downloadData.R |
|
|
1 |
|
|
|
2 |
# MESSAGE ----------------------------------------------------------------- |
|
|
3 |
# |
|
|
4 |
# author: Yulin Lyu |
|
|
5 |
# email: lvyulin@pku.edu.cn |
|
|
6 |
# |
|
|
7 |
# require: R whatever |
|
|
8 |
# |
|
|
9 |
# --- |
|
|
10 |
|
|
|
11 |
# * Load packages --------------------------------------------------------- |
|
|
12 |
|
|
|
13 |
library(tidyverse) |
|
|
14 |
library(magrittr) |
|
|
15 |
library(glue) |
|
|
16 |
|
|
|
17 |
# * From GEO -------------------------------------------------------------- |
|
|
18 |
|
|
|
19 |
setwd("/mnt/f") # download dir |
|
|
20 |
|
|
|
21 |
# RNA |
|
|
22 |
setwd("exampleData/RNA/raw") |
|
|
23 |
geo <- "GSE147839" |
|
|
24 |
sra <- "SRP254790" |
|
|
25 |
bioProj <- "PRJNA622253" |
|
|
26 |
|
|
|
27 |
# ATAC |
|
|
28 |
setwd("exampleData/ATAC/raw") |
|
|
29 |
geo <- "GSE157237" |
|
|
30 |
sra <- "SRP279550" |
|
|
31 |
bioProj <- "PRJNA660602" |
|
|
32 |
|
|
|
33 |
# download from SRA directly using wget (slow) |
|
|
34 |
|
|
|
35 |
dir.create("sra") |
|
|
36 |
|
|
|
37 |
download.file( |
|
|
38 |
glue("http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?save=efetch&db=sra&rettype=runinfo&term={sra}"), |
|
|
39 |
glue("{where}/{sra}.csv", where = getwd())) |
|
|
40 |
|
|
|
41 |
projMeta <- read_csv(glue("{where}/{sra}.csv", where = getwd())) |
|
|
42 |
|
|
|
43 |
srr <- projMeta$Run |
|
|
44 |
downPath <- projMeta$download_path |
|
|
45 |
|
|
|
46 |
down_cmd <- glue("wget -b -c -o sra/{srr}.log -O sra/{srr}.sra {downPath}") |
|
|
47 |
|
|
|
48 |
write.table(c("#!/bin/bash\n", down_cmd), "down.sh", sep = "\n", quote = F, row.names = F, col.names = F) |
|
|
49 |
|
|
|
50 |
# download from ENA using aspera (fast) |
|
|
51 |
# NOTE: NOT all datasets in SRA are accessible in ENA |
|
|
52 |
|
|
|
53 |
# <todo> I will complete this part when needed in the future. |
|
|
54 |
|
|
|
55 |
|
|
|
56 |
# * From ArrayExpress ----------------------------------------------------- |
|
|
57 |
|
|
|
58 |
# <todo> I will complete this part when needed in the future. |
|
|
59 |
|
|
|
60 |
|
|
|
61 |
# * Extract fastq --------------------------------------------------------- |
|
|
62 |
|
|
|
63 |
dir.create("fastq") |
|
|
64 |
|
|
|
65 |
ext_cmd <- glue("fasterq-dump -e 10 -3 -O fastq sra/{srr}.sra") |
|
|
66 |
|
|
|
67 |
write.table(c("#!/bin/bash\n", ext_cmd), "ext.sh", sep = "\n", quote = F, row.names = F, col.names = F) |
|
|
68 |
|
|
|
69 |
gzip_cmd <- glue( |
|
|
70 |
"gzip fastq/{srr}.sra_1.fastq & |
|
|
71 |
gzip fastq/{srr}.sra_2.fastq &") |
|
|
72 |
|
|
|
73 |
write.table(c("#!/bin/bash\n", gzip_cmd), "gzip.sh", sep = "\n", quote = F, row.names = F, col.names = F) |
|
|
74 |
|