[507a54]: / production / rasa-server / rasa / spell_checker.py

Download this file

101 lines (80 with data), 3.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
import typing
from typing import Any, Optional, Text, Dict, List, Type
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.nlu.config import RasaNLUModelConfig
from rasa.nlu.components import Component
if typing.TYPE_CHECKING:
from rasa.nlu.model import Metadata
from symspellpy import SymSpell
class SpellChecker(Component):
@classmethod
def required_components(cls) -> List[Type[Component]]:
return []
defaults = {}
supported_language_list = ["en"]
def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
super().__init__(component_config)
# Path of dictionaries
self.dictionary_path = "dictionary/frequency_dictionary.txt"
self.med_dictionary_path = "dictionary/frequency_med_dictionary.txt"
self.bigram_path = "dictionary/frequency_bigramdictionary.txt"
self.med_bigram_path = "dictionary/frequency_med_bigramdictionary.txt"
# Correct english and medical typoes
self.sym_spell = SymSpell(
max_dictionary_edit_distance=2, prefix_length=7)
# Load general English words
self.sym_spell.load_dictionary(
self.dictionary_path, term_index=0, count_index=1)
# Load medical words
self.sym_spell.load_dictionary(
self.med_dictionary_path, term_index=0, count_index=1)
# Load bigram English words
self.sym_spell.load_bigram_dictionary(
self.bigram_path, term_index=0, count_index=2)
# Load bigram medical words
self.sym_spell.load_bigram_dictionary(
self.med_bigram_path, term_index=0, count_index=2)
def train(
self,
training_data: TrainingData,
config: Optional[RasaNLUModelConfig] = None,
**kwargs: Any,
) -> None:
pass
def correct(self, input_term):
# Get suggestions list from SymSpell
suggestions = self.sym_spell.lookup_compound(input_term,
max_edit_distance=2,
split_phrase_by_space=True,
ignore_term_with_digits=True,
ignore_non_words=True,
transfer_casing=True)
# Get the top suggestion
first_suggestion = suggestions[0]._term
return first_suggestion
def process(self, message: Message, **kwargs: Any) -> None:
# Get user message
input_term = message.get("text")
if len(input_term) > 3:
# Get the top suggestion
first_suggestion = self.correct(input_term)
# Return top suggestion
message.set('text', first_suggestion, add_to_output=True)
else:
message.set('text', input_term, add_to_output=True)
def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
pass
@classmethod
def load(
cls,
meta: Dict[Text, Any],
model_dir: Text,
model_metadata: Optional["Metadata"] = None,
cached_component: Optional["Component"] = None,
**kwargs: Any,
) -> "Component":
if cached_component:
return cached_component
else:
return cls(meta)