|
a |
|
b/openomics_web/utils/str_utils.py |
|
|
1 |
COUNT = "_count" |
|
|
2 |
|
|
|
3 |
|
|
|
4 |
def make_trie(words): |
|
|
5 |
""" |
|
|
6 |
Args: |
|
|
7 |
words: |
|
|
8 |
""" |
|
|
9 |
root = {} |
|
|
10 |
for word in words: |
|
|
11 |
current_dict = root |
|
|
12 |
for letter in word: |
|
|
13 |
current_dict = current_dict.setdefault(letter, {}) |
|
|
14 |
|
|
|
15 |
if COUNT not in current_dict: |
|
|
16 |
current_dict[COUNT] = 1 |
|
|
17 |
else: |
|
|
18 |
current_dict[COUNT] += 1 |
|
|
19 |
return root |
|
|
20 |
|
|
|
21 |
|
|
|
22 |
def longest_common_prefix(strs): |
|
|
23 |
""" |
|
|
24 |
Args: |
|
|
25 |
strs: |
|
|
26 |
""" |
|
|
27 |
def traverse_trie(dictionary, prefix): |
|
|
28 |
if len(prefix) > 100: |
|
|
29 |
return prefix |
|
|
30 |
|
|
|
31 |
if len(dictionary.keys()) == 1 and COUNT not in dictionary.keys(): |
|
|
32 |
letter = list(dictionary.keys())[0] |
|
|
33 |
return traverse_trie(dictionary[letter], prefix + letter) |
|
|
34 |
else: |
|
|
35 |
return prefix |
|
|
36 |
|
|
|
37 |
trie = make_trie(strs) |
|
|
38 |
lcp_branches = [] |
|
|
39 |
for branch in trie: |
|
|
40 |
branch_lcp = traverse_trie(trie[branch], branch) |
|
|
41 |
lcp_branches.append(branch_lcp) |
|
|
42 |
|
|
|
43 |
return lcp_branches |