a b/edsnlp/conjugator.py
1
from typing import Dict, List, Union
2
3
import mlconjug3
4
import pandas as pd
5
6
7
def conjugate_verb(
8
    verb: str,
9
    conjugator: mlconjug3.Conjugator,
10
) -> pd.DataFrame:
11
    """
12
    Conjugates the verb using an instance of mlconjug3,
13
    and formats the results in a pandas `DataFrame`.
14
15
    Parameters
16
    ----------
17
    verb : str
18
        Verb to conjugate.
19
    conjugator : mlconjug3.Conjugator
20
        mlconjug3 instance for conjugating.
21
22
    Returns
23
    -------
24
    pd.DataFrame
25
        Normalized dataframe containing all conjugated forms
26
        for the verb.
27
    """
28
29
    df = pd.DataFrame(
30
        conjugator.conjugate(verb).iterate(),
31
        columns=["mode", "tense", "person", "term"],
32
    )
33
34
    df.term = df.term.fillna(df.person)
35
    df.loc[df.person == df.term, "person"] = None
36
37
    df.insert(0, "verb", verb)
38
39
    return df
40
41
42
def conjugate(
43
    verbs: Union[str, List[str]],
44
    language: str = "fr",
45
) -> pd.DataFrame:
46
    """
47
    Conjugate a list of verbs.
48
49
    Parameters
50
    ----------
51
    verbs : Union[str, List[str]]
52
        List of verbs to conjugate
53
    language: str
54
        Language to conjugate. Defaults to French (`fr`).
55
56
    Returns
57
    -------
58
    pd.DataFrame
59
        Dataframe containing the conjugations for the provided verbs.
60
        Columns: `verb`, `mode`, `tense`, `person`, `term`
61
    """
62
    if isinstance(verbs, str):
63
        verbs = [verbs]
64
65
    conjugator = mlconjug3.Conjugator(language=language)
66
67
    df = pd.concat([conjugate_verb(verb, conjugator=conjugator) for verb in verbs])
68
69
    df = df.reset_index(drop=True)
70
71
    return df
72
73
74
def get_conjugated_verbs(
75
    verbs: Union[str, List[str]],
76
    matches: Union[List[Dict[str, str]], Dict[str, str]],
77
    language: str = "fr",
78
) -> List[str]:
79
    """
80
    Get a list of conjugated verbs.
81
82
    Parameters
83
    ----------
84
    verbs : Union[str, List[str]]
85
        List of verbs to conjugate.
86
    matches : Union[List[Dict[str, str]], Dict[str, str]]
87
        List of dictionary describing the mode/tense/persons to keep.
88
    language : str, optional
89
        [description], by default "fr" (French)
90
91
    Returns
92
    -------
93
    List[str]
94
        List of terms to look for.
95
96
    Examples
97
    --------
98
    >>> get_conjugated_verbs(
99
            "aimer",
100
            dict(mode="Indicatif", tense="Présent", person="1p"),
101
        )
102
    ['aimons']
103
    """
104
105
    if isinstance(matches, dict):
106
        matches = [matches]
107
108
    terms = []
109
110
    df = conjugate(
111
        verbs=verbs,
112
        language=language,
113
    )
114
115
    for match in matches:
116
        q = " & ".join([f'{k} == "{v}"' for k, v in match.items()])
117
        terms.extend(df.query(q).term.unique())
118
119
    return list(set(terms))