[5b4ecd]: / gap-replay / download.sh

Download this file

35 lines (27 with data), 988 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash
# Entire data pipeline for the GAP-Replay dataset.
DATA_PATH=/data/gap-replay
echo "GAP-Replay path: $DATA_PATH"
echo "Downloading guidelines"
./guidelines/download.sh $DATA_PATH
echo "Done"
echo "Downloading Pubmed Papers + Abstracts"
./pubmed/download.sh $DATA_PATH
echo "Done"
echo "Downloading Replay corpus"
./replay/download.sh
echo "Done"
echo "Combining files into GAP-Replay (Train + Test)"
python pubmed/process.py \
--combine \
--source_path $DATA_PATH/s2orc-PubMed_processed_train.jsonl,\
$DATA_PATH/abstracts-PubMed_processed_train.jsonl,\
$DATA_PATH/guidelines_train.jsonl,\
$DATA_PATH/replay.jsonl\
--save_path $DATA_PATH/gap_replay_train.jsonl
python pubmed/process.py \
--combine \
--source_path $DATA_PATH/s2orc-PubMed_processed_test.jsonl,\
$DATA_PATH/abstracts-PubMed_processed_test.jsonl,\
$DATA_PATH/guidelines_test.jsonl,\
--save_path $DATA_PATH/gap_replay_test.jsonl