[e1b945]: / modules / chatbot / preprocessor.py

Download this file

35 lines (26 with data), 660 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import re
from modules.chatbot.const import CONTRACTIONS
def decontracted(phrase):
"""
Decontract a phrase.
Args:
phrase (str): The input phrase.
Returns:
str: Decontracted phrase.
"""
for key, value in CONTRACTIONS.items():
phrase = phrase.replace(key, value)
return phrase
def preprocess(text):
"""
Preprocess text.
Args:
text (str): The input text.
Returns:
str: Preprocessed text.
"""
text = text.lower()
text = decontracted(text)
text = re.sub(r"[$)\?\"’.°!;'€%:,(/]", "", text)
text = re.sub(r"\u200b|\xa0|-", " ", text)
return text