|
a |
|
b/modules/chatbot/preprocessor.py |
|
|
1 |
import re |
|
|
2 |
from modules.chatbot.const import CONTRACTIONS |
|
|
3 |
|
|
|
4 |
|
|
|
5 |
def decontracted(phrase): |
|
|
6 |
""" |
|
|
7 |
Decontract a phrase. |
|
|
8 |
|
|
|
9 |
Args: |
|
|
10 |
phrase (str): The input phrase. |
|
|
11 |
|
|
|
12 |
Returns: |
|
|
13 |
str: Decontracted phrase. |
|
|
14 |
""" |
|
|
15 |
for key, value in CONTRACTIONS.items(): |
|
|
16 |
phrase = phrase.replace(key, value) |
|
|
17 |
return phrase |
|
|
18 |
|
|
|
19 |
|
|
|
20 |
def preprocess(text): |
|
|
21 |
""" |
|
|
22 |
Preprocess text. |
|
|
23 |
|
|
|
24 |
Args: |
|
|
25 |
text (str): The input text. |
|
|
26 |
|
|
|
27 |
Returns: |
|
|
28 |
str: Preprocessed text. |
|
|
29 |
""" |
|
|
30 |
text = text.lower() |
|
|
31 |
text = decontracted(text) |
|
|
32 |
text = re.sub(r"[$)\?\"’.°!;'€%:,(/]", "", text) |
|
|
33 |
text = re.sub(r"\u200b|\xa0|-", " ", text) |
|
|
34 |
return text |