[c0f169]: / scripts / validate_bio_format_bkup.sh

Download this file

60 lines (48 with data), 1.4 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
# Check if the input directory exists
if [ ! -d "$1" ]; then
echo "Error: Directory not found!"
exit 1
fi
# Loop through each .bio file in the input directory
for file in "$1"/*.bio; do
# Check if the file exists
if [ ! -f "$file" ]; then
echo "Error: File not found!"
exit 1
fi
# Loop through each line in the file
prev_tag=""
while read -r line; do
# Skip empty lines
if [[ "$line" =~ ^[[:space:]]*$ ]]; then
continue
fi
# Split the line into columns
columns=($line)
# Check if the line contains exactly two columns
if [ "${#columns[@]}" -ne 2 ]; then
echo "Error: Invalid number of columns on line: $line in file $file"
continue
fi
# Check if the second column contains a valid BIO tag
if [[ ! "${columns[1]}" =~ ^(B|I|O)(-[A-Za-z_]+)?$ ]]; then
echo "Error: Invalid BIO tag on line: $line in file $file"
continue
fi
# Check if the tag sequence is valid
if [ "${columns[1]:0:1}" == "I" ]; then
if [ "${prev_tag}" == "" ]; then
echo "Error: Invalid tag sequence on line: $line in file $file"
continue
fi
if [ "${columns[1]:2}" != "${prev_tag}" ]; then
echo "Error: Invalid tag sequence on line: $line in file $file"
continue
fi
else
prev_tag="${columns[1]:2}"
fi
done < "$file"
echo "$file contains valid BIO format."
done