|
a |
|
b/a_PreprocessPipeline/bulkATACpre.R |
|
|
1 |
|
|
|
2 |
# MESSAGE ----------------------------------------------------------------- |
|
|
3 |
# |
|
|
4 |
# author: Yulin Lyu |
|
|
5 |
# email: lvyulin@pku.edu.cn |
|
|
6 |
# |
|
|
7 |
# require: R whatever |
|
|
8 |
# |
|
|
9 |
# --- |
|
|
10 |
|
|
|
11 |
# 0. Load packages -------------------------------------------------------- |
|
|
12 |
|
|
|
13 |
library(jsonlite) |
|
|
14 |
library(tidyverse) |
|
|
15 |
library(magrittr) |
|
|
16 |
library(glue) |
|
|
17 |
|
|
|
18 |
setwd("/mnt/d/proj/github/Protocols-4pub") |
|
|
19 |
source("a_PreprocessPipeline/utils.R") |
|
|
20 |
|
|
|
21 |
settingList <- fromJSON("a_PreprocessPipeline/bulkATACpre_conf_X.json") |
|
|
22 |
cmdConf <- fromJSON("a_PreprocessPipeline/cmd_conf_X.json") |
|
|
23 |
|
|
|
24 |
setwd(settingList$workDir) |
|
|
25 |
|
|
|
26 |
# A. Rename --------------------------------------------------------------- |
|
|
27 |
|
|
|
28 |
setwd("0_fastq") |
|
|
29 |
|
|
|
30 |
list.files(".") |
|
|
31 |
|
|
|
32 |
from_file <- list.files(".", "") |
|
|
33 |
to_file <- gsub("", "", from_file) |
|
|
34 |
|
|
|
35 |
file.rename(from_file, to_file) |
|
|
36 |
|
|
|
37 |
setwd("..") |
|
|
38 |
|
|
|
39 |
# B. Load data ------------------------------------------------------------ |
|
|
40 |
|
|
|
41 |
file_name <- list.files("0_fastq", ".gz") |
|
|
42 |
file_name <- list.files("2_trim", ".gz") # after qc |
|
|
43 |
|
|
|
44 |
R1 <- grep("_1\\.", file_name, value = T) |
|
|
45 |
R2 <- grep("_2\\.", file_name, value = T) |
|
|
46 |
|
|
|
47 |
sample_name <- gsub("_1\\..*", "", R1) |
|
|
48 |
|
|
|
49 |
# 1. QC for raw data ------------------------------------------------------ |
|
|
50 |
|
|
|
51 |
dir.create("code") |
|
|
52 |
|
|
|
53 |
qc_dir <- "1_qc" |
|
|
54 |
qc_dir %>% dir.create() |
|
|
55 |
qc_dir %>% str_c("code/", .) %>% dir.create() |
|
|
56 |
|
|
|
57 |
qc <- settingList$fastqcDir |
|
|
58 |
|
|
|
59 |
qc_cmd <- glue("{qc} -o {qc_dir} -t 48 0_fastq/{file_name}") |
|
|
60 |
cat(qc_cmd[1]) |
|
|
61 |
|
|
|
62 |
setCMD(qc_cmd, str_c("code/", qc_dir), 6) |
|
|
63 |
# multiqc -o 1_qc -f -n qc.raw 1_qc/*.zip |
|
|
64 |
|
|
|
65 |
# 2. Trim ----------------------------------------------------------------- |
|
|
66 |
|
|
|
67 |
trim_dir <- "2_trim" |
|
|
68 |
trim_dir %>% dir.create() |
|
|
69 |
trim_dir %>% str_c("code/", .) %>% dir.create() |
|
|
70 |
dir.create(".2_untrim") |
|
|
71 |
|
|
|
72 |
trim <- settingList$trimmomaticDir |
|
|
73 |
adapter <- settingList$trimAdapter |
|
|
74 |
|
|
|
75 |
trim_cmd <- glue( |
|
|
76 |
"java -jar {trim} PE -threads 48 \\ |
|
|
77 |
0_fastq/{R1} 0_fastq/{R2} \\ |
|
|
78 |
2_trim/{sample_name}_1.trim.fastq.gz \\ |
|
|
79 |
.2_untrim/{sample_name}_1.untrim.fastq.gz \\ |
|
|
80 |
2_trim/{sample_name}_2.trim.fastq.gz \\ |
|
|
81 |
.2_untrim/{sample_name}_2.untrim.fastq.gz \\ |
|
|
82 |
ILLUMINACLIP:{adapter}:2:30:7:1:true \\ |
|
|
83 |
LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 > \\ |
|
|
84 |
2_trim/{sample_name}.trim.log 2>&1" |
|
|
85 |
) |
|
|
86 |
cat(trim_cmd[1]) |
|
|
87 |
|
|
|
88 |
setCMD(trim_cmd, str_c("code/", trim_dir), 6) |
|
|
89 |
# multiqc -o 2_trim -f -n trim 2_trim/*.log |
|
|
90 |
|
|
|
91 |
# 3. QC for trimmed data -------------------------------------------------- |
|
|
92 |
|
|
|
93 |
# reload trimmed data first, or |
|
|
94 |
file_name <- glue("{rep(sample_name, each = 2)}_{rep(1:2, length(sample_name))}.trim.fastq.gz") |
|
|
95 |
R1 <- grep("_1\\.", file_name, value = T) |
|
|
96 |
R2 <- grep("_2\\.", file_name, value = T) |
|
|
97 |
|
|
|
98 |
qc_dir <- "3_qc" |
|
|
99 |
qc_dir %>% dir.create() |
|
|
100 |
qc_dir %>% str_c("code/", .) %>% dir.create() |
|
|
101 |
|
|
|
102 |
qc <- settingList$fastqcDir |
|
|
103 |
|
|
|
104 |
qc_cmd <- glue("{qc} -o {qc_dir} -t 48 {trim_dir}/{file_name}") |
|
|
105 |
cat(qc_cmd[1]) |
|
|
106 |
|
|
|
107 |
setCMD(qc_cmd, str_c("code/", qc_dir), 6) |
|
|
108 |
# multiqc -o 3_qc -f -n qc.trim 3_qc/*.zip |
|
|
109 |
|
|
|
110 |
# 4. Map ------------------------------------------------------------------ |
|
|
111 |
|
|
|
112 |
map_dir <- "4_map" |
|
|
113 |
map_dir %T>% dir.create() %>% str_c("code/", .) %>% dir.create() |
|
|
114 |
|
|
|
115 |
bt2 <- settingList$bt2Dir |
|
|
116 |
ref <- settingList$mapRef |
|
|
117 |
|
|
|
118 |
map_cmd <- glue( |
|
|
119 |
"{bt2} -p 48 --very-sensitive -X 2000 -x {ref} \\ |
|
|
120 |
-1 {trim_dir}/{R1} -2 {trim_dir}/{R2} \\ |
|
|
121 |
-S {map_dir}/{sample_name}.sam > \\ |
|
|
122 |
{map_dir}/{sample_name}.log 2>&1" |
|
|
123 |
) |
|
|
124 |
cat(map_cmd[1]) |
|
|
125 |
|
|
|
126 |
setCMD(map_cmd, str_c("code/", map_dir), 6) |
|
|
127 |
# multiqc -o 4_map -f -n map 4_map/*.log |
|
|
128 |
|
|
|
129 |
# 5. Sort ----------------------------------------------------------------- |
|
|
130 |
|
|
|
131 |
sort_dir <- "5_sort" |
|
|
132 |
sort_dir %T>% dir.create() %>% str_c("code/", .) %>% dir.create() |
|
|
133 |
|
|
|
134 |
samt <- settingList$samtDir |
|
|
135 |
|
|
|
136 |
sort_cmd <- glue( |
|
|
137 |
"{samt} view -@ 24 -bS {map_dir}/{sample_name}.sam | \\ |
|
|
138 |
{samt} sort -@ 24 > \\ |
|
|
139 |
{sort_dir}/{sample_name}.bam" |
|
|
140 |
) |
|
|
141 |
cat(sort_cmd[1]) |
|
|
142 |
|
|
|
143 |
setCMD(sort_cmd, str_c("code/", sort_dir), 6) |
|
|
144 |
|
|
|
145 |
# 6. Dedup ---------------------------------------------------------------- |
|
|
146 |
|
|
|
147 |
dedup_dir <- "6_dedup" |
|
|
148 |
dedup_dir %T>% dir.create() %>% str_c("code/", .) %>% dir.create() |
|
|
149 |
|
|
|
150 |
picard <- settingList$picardDir |
|
|
151 |
|
|
|
152 |
dedup_cmd <- glue( |
|
|
153 |
"java -Xms16g -Xmx256g -jar {picard} MarkDuplicates \\ |
|
|
154 |
I={sort_dir}/{sample_name}.bam \\ |
|
|
155 |
O={dedup_dir}/{sample_name}.dedup.bam \\ |
|
|
156 |
M={dedup_dir}/{sample_name}.dedup.txt \\ |
|
|
157 |
REMOVE_DUPLICATES=true > \\ |
|
|
158 |
{dedup_dir}/{sample_name}.dedup.log 2>&1" |
|
|
159 |
) |
|
|
160 |
cat(dedup_cmd[1]) |
|
|
161 |
|
|
|
162 |
setCMD(dedup_cmd, str_c("code/", dedup_dir), 6) |
|
|
163 |
|
|
|
164 |
# 7. Filter --------------------------------------------------------------- |
|
|
165 |
|
|
|
166 |
fil_dir <- "7_filter" |
|
|
167 |
fil_dir %T>% dir.create() %>% str_c("code/", .) %>% dir.create() |
|
|
168 |
|
|
|
169 |
samt <- settingList$samtDir |
|
|
170 |
|
|
|
171 |
fil_cmd <- glue( |
|
|
172 |
"{samt} view -@ 20 -h -f 2 -q 30 {dedup_dir}/{sample_name}.dedup.bam | \\ |
|
|
173 |
egrep -v '\\bMT|\\bGL|\\bJH' | \\ |
|
|
174 |
{samt} sort -@ 20 -O bam > \\ |
|
|
175 |
{fil_dir}/{sample_name}.filter.bam" |
|
|
176 |
) |
|
|
177 |
cat(fil_cmd[1]) |
|
|
178 |
|
|
|
179 |
setCMD(fil_cmd, str_c("code/", fil_dir), 6) |
|
|
180 |
|
|
|
181 |
# 8. Insert --------------------------------------------------------------- |
|
|
182 |
|
|
|
183 |
ins_dir <- "8_insert" |
|
|
184 |
ins_dir %T>% dir.create() %>% str_c("code/", .) %>% dir.create() |
|
|
185 |
|
|
|
186 |
picard <- settingList$picardDir |
|
|
187 |
|
|
|
188 |
ins_cmd <- glue( |
|
|
189 |
"java -Xms16g -Xmx256g -jar {picard} CollectInsertSizeMetrics \\ |
|
|
190 |
I={fil_dir}/{sample_name}.filter.bam \\ |
|
|
191 |
O={ins_dir}/{sample_name}.txt \\ |
|
|
192 |
H={ins_dir}/{sample_name}.pdf > \\ |
|
|
193 |
{ins_dir}/{sample_name}.log 2>&1" |
|
|
194 |
) |
|
|
195 |
cat(ins_cmd[1]) |
|
|
196 |
|
|
|
197 |
setCMD(ins_cmd, str_c("code/", ins_dir), 6) |
|
|
198 |
|
|
|
199 |
# 9. Shift ---------------------------------------------------------------- |
|
|
200 |
|
|
|
201 |
shift_dir <- "9_shift" |
|
|
202 |
shift_dir %T>% dir.create() %>% str_c("code/", .) %>% dir.create() |
|
|
203 |
|
|
|
204 |
bedt <- settingList$bedtDir |
|
|
205 |
|
|
|
206 |
shift_cmd <- glue( |
|
|
207 |
"{bedt} bamtobed -i {fil_dir}/{sample_name}.filter.bam | \\ |
|
|
208 |
{awk_cmd} \\ |
|
|
209 |
> {shift_dir}/{sample_name}.shift.bed", |
|
|
210 |
awk_cmd = "awk -F '\\t' 'BEGIN {OFS = FS}{ if ($6 == \"+\") {$2 = $2 + 4} else if ($6 == \"-\") {$3 = $3 - 5} print $0}'" |
|
|
211 |
) |
|
|
212 |
cat(shift_cmd[1]) |
|
|
213 |
|
|
|
214 |
setCMD(shift_cmd, str_c("code/", shift_dir), 6) |
|
|
215 |
|
|
|
216 |
# 10. Peak ---------------------------------------------------------------- |
|
|
217 |
|
|
|
218 |
macs2 <- settingList$macs2Dir |
|
|
219 |
|
|
|
220 |
peak_dir <- "10_peak" |
|
|
221 |
peak_dir %>% dir.create() |
|
|
222 |
|
|
|
223 |
org <- "hs" |
|
|
224 |
org <- "mm" |
|
|
225 |
|
|
|
226 |
peak_cmd <- glue(" |
|
|
227 |
{macs2} callpeak -t {shift_dir}/{sample_name}.shift.bed \\ |
|
|
228 |
-g {org} --nomodel --shift -100 --extsize 200 --keep-dup all -n {sample_name} \\ |
|
|
229 |
-B --outdir {peak_dir} > {peak_dir}/{sample_name}.log 2>&1 &") |
|
|
230 |
cat(peak_cmd[1]) |
|
|
231 |
|
|
|
232 |
group_name <- tapply(sample_name, str_sub(sample_name, 1, -3), c) |
|
|
233 |
|
|
|
234 |
group_peak_cmd <- glue( |
|
|
235 |
"{macs2} callpeak -t {peak_group} \\ |
|
|
236 |
-g {org} --nomodel --shift -100 --extsize 200 --keep-dup all --SPMR -n group_{group} \\ |
|
|
237 |
-B --outdir {peak_dir} > {peak_dir}/group_{group}.log 2>&1 &", |
|
|
238 |
peak_group = map_chr(group_name, ~ paste(glue("{shift_dir}/{.x}.shift.bed"), collapse = " ")), |
|
|
239 |
group = names(group_name)) |
|
|
240 |
cat(group_peak_cmd[1]) |
|
|
241 |
|
|
|
242 |
write.table(c("#!/bin/bash\n", peak_cmd), glue("code/{peak_dir}.sh"), quote = F, row.names = F, col.names = F) |
|
|
243 |
write.table(c("#!/bin/bash\n", group_peak_cmd), glue("code/{peak_dir}_group.sh"), quote = F, row.names = F, col.names = F) |
|
|
244 |
|
|
|
245 |
# * * 2.11. Bigwig -------------------------------------------------------- |
|
|
246 |
|
|
|
247 |
bw_dir <- "11_bw" |
|
|
248 |
bw_dir %>% dir.create() |
|
|
249 |
|
|
|
250 |
dt <- settingList$dtDir |
|
|
251 |
bdg2bw <- settingList$bdg2bwDir |
|
|
252 |
ref_len <- settingList$refLen |
|
|
253 |
|
|
|
254 |
index_cmd <- glue("{st} index {fil_dir}/{sample_name}.filter.bam &") |
|
|
255 |
cat(index_cmd[1]) |
|
|
256 |
|
|
|
257 |
bw_cmd <- glue(" |
|
|
258 |
{dt}bamCoverage --normalizeUsing RPKM -of bigwig -b {fil_dir}/{sample_name}.filter.bam \\ |
|
|
259 |
-o {bw_dir}/{sample_name}.bw &") |
|
|
260 |
cat(bw_cmd[1]) |
|
|
261 |
|
|
|
262 |
group_bw_cmd <- glue( |
|
|
263 |
"{bdg2bw} {peak_dir}/group_{group}_treat_pileup.bdg {ref_len} {bw_dir}/{group}.bw &", |
|
|
264 |
group = names(group_name)) |
|
|
265 |
cat(group_bw_cmd[1]) |
|
|
266 |
|
|
|
267 |
sample_name <- list.files(bw_dir, "-[0-9].bw") %>% str_replace(".bw$", "") |
|
|
268 |
group_name <- tapply(sample_name, str_sub(sample_name, 1, -3), c) |
|
|
269 |
|
|
|
270 |
cor_cmd <- glue( |
|
|
271 |
"{dt}multiBigwigSummary bins -p 20 -b {bws} -o {bw_dir}/bw_cor.npz &", |
|
|
272 |
bws = str_c(glue("{bw_dir}/{sample_name}.bw"), collapse = " \\\n")) |
|
|
273 |
cat(cor_cmd) |
|
|
274 |
|
|
|
275 |
group_cor_cmd <- glue( |
|
|
276 |
"{dt}multiBigwigSummary bins -p 20 -b {group_bws} -o {bw_dir}/group_bw_cor.npz &", |
|
|
277 |
group_bws = str_c(glue("{bw_dir}/{names(group_name)}.bw"), collapse = " \\\n")) |
|
|
278 |
cat(group_cor_cmd) |
|
|
279 |
|
|
|
280 |
corplot_cmd <- glue( |
|
|
281 |
"{dt}plotCorrelation -in {bw_dir}/bw_cor.npz -c pearson -p heatmap --plotNumbers -o {bw_dir}/cor_heatmap.pdf --colorMap RdBu_r &") |
|
|
282 |
cat(corplot_cmd) |
|
|
283 |
|
|
|
284 |
group_corplot_cmd <- glue( |
|
|
285 |
"{dt}plotCorrelation -in {bw_dir}/group_bw_cor.npz -c pearson -p heatmap --plotNumbers -o {bw_dir}/group_cor_heatmap.pdf --colorMap RdBu_r &") |
|
|
286 |
cat(group_corplot_cmd) |
|
|
287 |
|
|
|
288 |
write.table(c("#!/bin/bash\n", index_cmd), glue("code/{bw_dir}_index.sh"), quote = F, row.names = F, col.names = F) |
|
|
289 |
write.table(c("#!/bin/bash\n", bw_cmd), glue("code/{bw_dir}.sh"), quote = F, row.names = F, col.names = F) |
|
|
290 |
write.table(c("#!/bin/bash\n", group_bw_cmd), glue("code/{bw_dir}_group.sh"), quote = F, row.names = F, col.names = F) |
|
|
291 |
|
|
|
292 |
write.table(c("#!/bin/bash\n", cor_cmd), glue("code/{bw_dir}_cor.sh"), quote = F, row.names = F, col.names = F) |
|
|
293 |
write.table(c("#!/bin/bash\n", group_cor_cmd), glue("code/{bw_dir}_cor_group.sh"), quote = F, row.names = F, col.names = F) |
|
|
294 |
write.table(c("#!/bin/bash\n", corplot_cmd), glue("code/{bw_dir}_plot_cor.sh"), quote = F, row.names = F, col.names = F) |
|
|
295 |
write.table(c("#!/bin/bash\n", group_corplot_cmd), glue("code/{bw_dir}_plot_cor_group.sh"), quote = F, row.names = F, col.names = F) |
|
|
296 |
|
|
|
297 |
# * * 2.12. IDR ----------------------------------------------------------- |
|
|
298 |
|
|
|
299 |
idr_dir <- "12_idr" |
|
|
300 |
idr_dir %>% dir.create() |
|
|
301 |
|
|
|
302 |
idr <- settingList$idrDir |
|
|
303 |
|
|
|
304 |
idr_cmd <- map(group_name, ~ as_tibble(combn(.x, 2)) %>% map(~ paste(glue("{peak_dir}/{.x}_peaks.narrowPeak"), collapse = " "))) %>% |
|
|
305 |
imap(~ glue("{idr} --samples {.x} --peak-list {peak_dir}/group_{.y}_peaks.narrowPeak \\ |
|
|
306 |
--input-file-type narrowPeak --output-file {idr_dir}/{.y}")) %>% |
|
|
307 |
map(~ imap(.x, ~ glue("{.x}-{.y}.idr.narrowPeak &"))) %>% unlist() %>% unname() |
|
|
308 |
cat(idr_cmd[1]) |
|
|
309 |
|
|
|
310 |
write.table(c("#!/bin/bash\n", idr_cmd), glue("code/{idr_dir}.sh"), quote = F, row.names = F, col.names = F) |
|
|
311 |
|
|
|
312 |
group_index <- map(group_name, ~ choose(length(.x), 2)) %>% imap(~ rep(.y, .x)) %>% unlist() %>% unname() |
|
|
313 |
idr_file <- list.files(idr_dir, "idr", full.names = T) |
|
|
314 |
idr_data <- map(idr_file, ~ read_delim(.x, "\t", col_names = F)) |
|
|
315 |
idr_peak <- map(idr_data, ~ .x[.x[[5]] > 540, ]) %>% |
|
|
316 |
map(~ str_c(.x[[1]], .x[[2]], .x[[3]], sep = "_")) %>% |
|
|
317 |
tapply(group_index, c) %>% |
|
|
318 |
map(~ unlist(.x) %>% unique) |
|
|
319 |
|
|
|
320 |
group_file <- list.files(peak_dir, "^group.*Peak", full.names = T) |
|
|
321 |
group_data <- map(group_file, ~ read.table(.x, sep = "\t", stringsAsFactors = F)) |
|
|
322 |
group_peak <- map(group_data, ~ str_c(.x[[1]], .x[[2]], .x[[3]], sep = "_")) |
|
|
323 |
|
|
|
324 |
groupData <- pmap(list(idr_peak, group_peak, group_data), function(a, b, c) {c[b %in% a, ]}) |
|
|
325 |
|
|
|
326 |
iwalk(group_data, ~ write.table(.x, glue("{idr_dir}/{.y}.narrowPeak"), col.names = F, row.names = F, quote = F, sep = "\t")) |
|
|
327 |
|
|
|
328 |
# * * 2.13. Motif --------------------------------------------------------- |
|
|
329 |
|
|
|
330 |
motif_dir <- "13_motif" |
|
|
331 |
motif_dir %>% dir.create() |
|
|
332 |
region_file <- list.files(motif_dir, "bed") |
|
|
333 |
|
|
|
334 |
homer <- settingList$homerDir |
|
|
335 |
|
|
|
336 |
org <- "hg19" |
|
|
337 |
org <- "mm10" |
|
|
338 |
|
|
|
339 |
motif_cmd <- glue( |
|
|
340 |
"{homer}findMotifsGenome.pl {motif_dir}/{region_file} {org} {motif_dir}/{rigion_name} \\ |
|
|
341 |
-size 200 -len 8,10,12 > {region_file}/{region_name}.log 2>&1 &", |
|
|
342 |
region_name = str_replace(region_file, ".bed", "")) |
|
|
343 |
cat(motif_cmd[1]) |
|
|
344 |
|
|
|
345 |
write.table(c("#!/bin/bash\n", motif_cmd), glue("code/{motif_dir}.sh"), quote = F, row.names = F, col.names = F) |
|
|
346 |
|
|
|
347 |
# * * 2.14. Heat ---------------------------------------------------------- |
|
|
348 |
|
|
|
349 |
heat_dir <- "14_heat" |
|
|
350 |
heat_dir %>% dir.create() |
|
|
351 |
|
|
|
352 |
sample_name <- list.files(bw_dir, "-[123].bw") %>% str_replace(".bw$", "") |
|
|
353 |
group_name <- tapply(sample_name, str_sub(sample_name, 1, -3), c) |
|
|
354 |
|
|
|
355 |
# for global |
|
|
356 |
mat_cmd <- glue( |
|
|
357 |
"{dt}computeMatrix reference-point -a 2000 -b 2000 -p 20 -R {heat_dir}/hg19geneTSS.bed -S \\ |
|
|
358 |
{bws} -o {heat_dir}/TSS_mtx.gz &", |
|
|
359 |
bws = str_c(glue("{bw_dir}/{names(group_name)}.bw"), collapse = " \\\n")) |
|
|
360 |
cat(mat_cmd) |
|
|
361 |
|
|
|
362 |
heat_cmd <- glue( |
|
|
363 |
"{dt}plotHeatmap -m {heat_dir}/TSS_mtx.gz --colorMap RdBu_r -o {heat_dir}/TSS_heatmap.pdf \\ |
|
|
364 |
--outFileNameMatrix {heat_dir}/TSS_value.txt --outFileSortedRegions {heat_dir}/TSS_region.bed &") |
|
|
365 |
cat(heat_cmd) |
|
|
366 |
|
|
|
367 |
peak_group <- list.files(heat_dir, "peakGroup", full.names = T) |
|
|
368 |
|
|
|
369 |
# for peaks |
|
|
370 |
mat_cmd <- glue( |
|
|
371 |
"{dt}computeMatrix scale-regions -a 1000 -b 1000 -p 20 -R {peak} -S \\ |
|
|
372 |
{bws} -o {heat_dir}/peak_mtx.gz &", |
|
|
373 |
peak = str_c(peak_group, collapse = " "), |
|
|
374 |
bws = str_c(glue("{bw_dir}/{names(group_name)}.bw"), collapse = " \\\n")) |
|
|
375 |
cat(mat_cmd) |
|
|
376 |
|
|
|
377 |
heat_cmd <- glue( |
|
|
378 |
"{dt}plotHeatmap -m {heat_dir}/peak_mtx.gz --colorMap RdBu_r -o {heat_dir}/peak_heatmap.pdf \\ |
|
|
379 |
--outFileNameMatrix {heat_dir}/peak_value.txt --outFileSortedRegions {heat_dir}/peak_region.bed &") |
|
|
380 |
cat(heat_cmd) |
|
|
381 |
|
|
|
382 |
write.table(c("#!/bin/bash\n", mat_cmd), glue("code/{heat_dir}_mtx.sh"), quote = F, row.names = F, col.names = F) |
|
|
383 |
write.table(c("#!/bin/bash\n", heat_cmd), glue("code/{heat_dir}.sh"), quote = F, row.names = F, col.names = F) |
|
|
384 |
|
|
|
385 |
# * * 2.15. Anno ---------------------------------------------------------- |
|
|
386 |
|
|
|
387 |
anno_dir <- "15_anno" |
|
|
388 |
anno_dir %>% dir.create() |
|
|
389 |
|
|
|
390 |
known_motif <- settingList$knownMotif |
|
|
391 |
|
|
|
392 |
peak_file <- list.files(anno_dir, ".homer") |
|
|
393 |
|
|
|
394 |
anno_cmd <- glue( |
|
|
395 |
"{homer}annotatePeaks.pl {anno_dir}/{peak_file} {org} -m {known_motif} -mscore > {anno_dir}/{peak_name}.txt &", |
|
|
396 |
peak_name = str_remove(peak_file, ".homer") |
|
|
397 |
) |
|
|
398 |
cat(anno_cmd) |
|
|
399 |
|
|
|
400 |
write.table(c("#!/bin/bash\n", anno_cmd), glue("code/{anno_dir}.sh"), quote = F, row.names = F, col.names = F) |
|
|
401 |
|