|
a |
|
b/tests/pipelines/misc/test_tables.py |
|
|
1 |
import pytest |
|
|
2 |
from spacy.tokens.span import Span |
|
|
3 |
|
|
|
4 |
TEXT = """ |
|
|
5 |
Le patientqsfqfdf bla bla bla |
|
|
6 |
Leucocytes ¦x10*9/L ¦4.97 ¦4.09-11 |
|
|
7 |
Hématies ¦x10*12/L¦4.68 ¦4.53-5.79 |
|
|
8 |
Hémoglobine ¦g/dL ¦14.8 ¦13.4-16.7 |
|
|
9 |
Hématocrite ¦% ¦44.2 ¦39.2-48.6 |
|
|
10 |
VGM ¦fL ¦94.4 + ¦79.6-94 |
|
|
11 |
TCMH ¦pg ¦31.6 ¦27.3-32.8 |
|
|
12 |
CCMH ¦g/dL ¦33.5 ¦32.4-36.3 |
|
|
13 |
Plaquettes ¦x10*9/L ¦191 ¦172-398 |
|
|
14 |
VMP ¦fL ¦11.5 + ¦7.4-10.8 |
|
|
15 |
|
|
|
16 |
qdfsdf |
|
|
17 |
|
|
|
18 |
2/2Pat : <NOM> <Prenom> |<date> | <ipp> |Intitulé RCP |
|
|
19 |
|
|
|
20 |
Table de taille <= 3 : |
|
|
21 |
|
|
|
22 |
|Libellé | Unité | Valeur | Intervalle | |
|
|
23 |
|Leucocytes |x10*9/L |4.97 | 4.09-11 | |
|
|
24 |
|
|
|
25 |
qdfsdf |
|
|
26 |
|
|
|
27 |
|Libellé | Unité | Valeur | Intervalle | |
|
|
28 |
|Leucocytes |x10*9/L |4.97 | 4.09-11 | |
|
|
29 |
|Hématies |x10*12/L|4.68 | 4.53-5.79 | |
|
|
30 |
|Hémoglobine |g/dL |14.8 | 13.4-16.7 | |
|
|
31 |
|Hématocrite ||44.2 | 39.2-48.6 | |
|
|
32 |
|VGM |fL | 94.4 + | 79.6-94 | |
|
|
33 |
|TCMH |pg |31.6 | |
|
|
34 |
|CCMH |g/dL |
|
|
35 |
|Plaquettes |x10*9/L |191 | 172-398 | |
|
|
36 |
|VMP |fL |11.5 + | 7.4-10.8 | |
|
|
37 |
|
|
|
38 |
""" |
|
|
39 |
|
|
|
40 |
|
|
|
41 |
def test_tables(blank_nlp): |
|
|
42 |
if blank_nlp.lang != "eds": |
|
|
43 |
pytest.skip("Test only for eds language") |
|
|
44 |
blank_nlp.add_pipe("eds.normalizer") |
|
|
45 |
blank_nlp.add_pipe("eds.tables", config=dict(min_rows=3)) |
|
|
46 |
|
|
|
47 |
doc = blank_nlp(TEXT) |
|
|
48 |
|
|
|
49 |
assert len(doc.spans["tables"]) == 2 |
|
|
50 |
|
|
|
51 |
span = doc.spans["tables"][0] |
|
|
52 |
df = span._.to_pd_table() |
|
|
53 |
assert len(df.columns) == 4 |
|
|
54 |
assert len(df) == 9 |
|
|
55 |
assert str(df.iloc[5, 0]) == "TCMH" |
|
|
56 |
|
|
|
57 |
span = doc.spans["tables"][1] |
|
|
58 |
df = span._.to_pd_table(header=True, index=True, as_spans=True) |
|
|
59 |
assert df.columns.tolist() == [ |
|
|
60 |
"Unité", |
|
|
61 |
"Valeur", |
|
|
62 |
"Intervalle", |
|
|
63 |
] |
|
|
64 |
assert df.index.tolist() == [ |
|
|
65 |
"Leucocytes", |
|
|
66 |
"Hématies", |
|
|
67 |
"Hémoglobine", |
|
|
68 |
"Hématocrite", |
|
|
69 |
"VGM", |
|
|
70 |
"TCMH", |
|
|
71 |
"CCMH", |
|
|
72 |
"Plaquettes", |
|
|
73 |
"VMP", |
|
|
74 |
] |
|
|
75 |
cell = df.loc["TCMH", "Valeur"] |
|
|
76 |
assert isinstance(cell, Span) |
|
|
77 |
assert cell.text == "31.6" |