a b/modules/chatbot/preprocessor.py
1
import re
2
from modules.chatbot.const import CONTRACTIONS
3
4
5
def decontracted(phrase):
6
    """
7
    Decontract a phrase.
8
9
    Args:
10
        phrase (str): The input phrase.
11
12
    Returns:
13
        str: Decontracted phrase.
14
    """
15
    for key, value in CONTRACTIONS.items():
16
        phrase = phrase.replace(key, value)
17
    return phrase
18
19
20
def preprocess(text):
21
    """
22
    Preprocess text.
23
24
    Args:
25
        text (str): The input text.
26
27
    Returns:
28
        str: Preprocessed text.
29
    """
30
    text = text.lower()
31
    text = decontracted(text)
32
    text = re.sub(r"[$)\?\"’.°!;'€%:,(/]", "", text)
33
    text = re.sub(r"\u200b|\xa0|-", " ", text)
34
    return text