|
a |
|
b/edsnlp/conjugator.py |
|
|
1 |
from typing import Dict, List, Union |
|
|
2 |
|
|
|
3 |
import mlconjug3 |
|
|
4 |
import pandas as pd |
|
|
5 |
|
|
|
6 |
|
|
|
7 |
def conjugate_verb( |
|
|
8 |
verb: str, |
|
|
9 |
conjugator: mlconjug3.Conjugator, |
|
|
10 |
) -> pd.DataFrame: |
|
|
11 |
""" |
|
|
12 |
Conjugates the verb using an instance of mlconjug3, |
|
|
13 |
and formats the results in a pandas `DataFrame`. |
|
|
14 |
|
|
|
15 |
Parameters |
|
|
16 |
---------- |
|
|
17 |
verb : str |
|
|
18 |
Verb to conjugate. |
|
|
19 |
conjugator : mlconjug3.Conjugator |
|
|
20 |
mlconjug3 instance for conjugating. |
|
|
21 |
|
|
|
22 |
Returns |
|
|
23 |
------- |
|
|
24 |
pd.DataFrame |
|
|
25 |
Normalized dataframe containing all conjugated forms |
|
|
26 |
for the verb. |
|
|
27 |
""" |
|
|
28 |
|
|
|
29 |
df = pd.DataFrame( |
|
|
30 |
conjugator.conjugate(verb).iterate(), |
|
|
31 |
columns=["mode", "tense", "person", "term"], |
|
|
32 |
) |
|
|
33 |
|
|
|
34 |
df.term = df.term.fillna(df.person) |
|
|
35 |
df.loc[df.person == df.term, "person"] = None |
|
|
36 |
|
|
|
37 |
df.insert(0, "verb", verb) |
|
|
38 |
|
|
|
39 |
return df |
|
|
40 |
|
|
|
41 |
|
|
|
42 |
def conjugate( |
|
|
43 |
verbs: Union[str, List[str]], |
|
|
44 |
language: str = "fr", |
|
|
45 |
) -> pd.DataFrame: |
|
|
46 |
""" |
|
|
47 |
Conjugate a list of verbs. |
|
|
48 |
|
|
|
49 |
Parameters |
|
|
50 |
---------- |
|
|
51 |
verbs : Union[str, List[str]] |
|
|
52 |
List of verbs to conjugate |
|
|
53 |
language: str |
|
|
54 |
Language to conjugate. Defaults to French (`fr`). |
|
|
55 |
|
|
|
56 |
Returns |
|
|
57 |
------- |
|
|
58 |
pd.DataFrame |
|
|
59 |
Dataframe containing the conjugations for the provided verbs. |
|
|
60 |
Columns: `verb`, `mode`, `tense`, `person`, `term` |
|
|
61 |
""" |
|
|
62 |
if isinstance(verbs, str): |
|
|
63 |
verbs = [verbs] |
|
|
64 |
|
|
|
65 |
conjugator = mlconjug3.Conjugator(language=language) |
|
|
66 |
|
|
|
67 |
df = pd.concat([conjugate_verb(verb, conjugator=conjugator) for verb in verbs]) |
|
|
68 |
|
|
|
69 |
df = df.reset_index(drop=True) |
|
|
70 |
|
|
|
71 |
return df |
|
|
72 |
|
|
|
73 |
|
|
|
74 |
def get_conjugated_verbs( |
|
|
75 |
verbs: Union[str, List[str]], |
|
|
76 |
matches: Union[List[Dict[str, str]], Dict[str, str]], |
|
|
77 |
language: str = "fr", |
|
|
78 |
) -> List[str]: |
|
|
79 |
""" |
|
|
80 |
Get a list of conjugated verbs. |
|
|
81 |
|
|
|
82 |
Parameters |
|
|
83 |
---------- |
|
|
84 |
verbs : Union[str, List[str]] |
|
|
85 |
List of verbs to conjugate. |
|
|
86 |
matches : Union[List[Dict[str, str]], Dict[str, str]] |
|
|
87 |
List of dictionary describing the mode/tense/persons to keep. |
|
|
88 |
language : str, optional |
|
|
89 |
[description], by default "fr" (French) |
|
|
90 |
|
|
|
91 |
Returns |
|
|
92 |
------- |
|
|
93 |
List[str] |
|
|
94 |
List of terms to look for. |
|
|
95 |
|
|
|
96 |
Examples |
|
|
97 |
-------- |
|
|
98 |
>>> get_conjugated_verbs( |
|
|
99 |
"aimer", |
|
|
100 |
dict(mode="Indicatif", tense="Présent", person="1p"), |
|
|
101 |
) |
|
|
102 |
['aimons'] |
|
|
103 |
""" |
|
|
104 |
|
|
|
105 |
if isinstance(matches, dict): |
|
|
106 |
matches = [matches] |
|
|
107 |
|
|
|
108 |
terms = [] |
|
|
109 |
|
|
|
110 |
df = conjugate( |
|
|
111 |
verbs=verbs, |
|
|
112 |
language=language, |
|
|
113 |
) |
|
|
114 |
|
|
|
115 |
for match in matches: |
|
|
116 |
q = " & ".join([f'{k} == "{v}"' for k, v in match.items()]) |
|
|
117 |
terms.extend(df.query(q).term.unique()) |
|
|
118 |
|
|
|
119 |
return list(set(terms)) |