Switch to unified view

a b/tests/pipelines/misc/test_tables.py
1
import pytest
2
from spacy.tokens.span import Span
3
4
TEXT = """
5
Le patientqsfqfdf bla bla bla
6
Leucocytes ¦x10*9/L ¦4.97 ¦4.09-11
7
Hématies ¦x10*12/L¦4.68 ¦4.53-5.79
8
Hémoglobine ¦g/dL ¦14.8 ¦13.4-16.7
9
Hématocrite ¦% ¦44.2 ¦39.2-48.6
10
VGM ¦fL ¦94.4 + ¦79.6-94
11
TCMH ¦pg ¦31.6 ¦27.3-32.8
12
CCMH ¦g/dL ¦33.5 ¦32.4-36.3
13
Plaquettes ¦x10*9/L ¦191 ¦172-398
14
VMP ¦fL ¦11.5 + ¦7.4-10.8
15
16
qdfsdf
17
18
2/2Pat : <NOM> <Prenom> |<date> | <ipp> |Intitulé RCP
19
20
Table de taille <= 3 :
21
22
 |Libellé | Unité | Valeur | Intervalle |
23
 |Leucocytes |x10*9/L |4.97 | 4.09-11 |
24
25
qdfsdf
26
27
 |Libellé | Unité | Valeur | Intervalle |
28
 |Leucocytes |x10*9/L |4.97 | 4.09-11 |
29
 |Hématies |x10*12/L|4.68 | 4.53-5.79 |
30
 |Hémoglobine |g/dL |14.8 | 13.4-16.7 |
31
 |Hématocrite ||44.2 | 39.2-48.6 |
32
 |VGM |fL | 94.4 + | 79.6-94 |
33
 |TCMH |pg |31.6 |
34
 |CCMH |g/dL
35
 |Plaquettes |x10*9/L |191 | 172-398 |
36
 |VMP |fL |11.5 + | 7.4-10.8 |
37
38
"""
39
40
41
def test_tables(blank_nlp):
42
    if blank_nlp.lang != "eds":
43
        pytest.skip("Test only for eds language")
44
    blank_nlp.add_pipe("eds.normalizer")
45
    blank_nlp.add_pipe("eds.tables", config=dict(min_rows=3))
46
47
    doc = blank_nlp(TEXT)
48
49
    assert len(doc.spans["tables"]) == 2
50
51
    span = doc.spans["tables"][0]
52
    df = span._.to_pd_table()
53
    assert len(df.columns) == 4
54
    assert len(df) == 9
55
    assert str(df.iloc[5, 0]) == "TCMH"
56
57
    span = doc.spans["tables"][1]
58
    df = span._.to_pd_table(header=True, index=True, as_spans=True)
59
    assert df.columns.tolist() == [
60
        "Unité",
61
        "Valeur",
62
        "Intervalle",
63
    ]
64
    assert df.index.tolist() == [
65
        "Leucocytes",
66
        "Hématies",
67
        "Hémoglobine",
68
        "Hématocrite",
69
        "VGM",
70
        "TCMH",
71
        "CCMH",
72
        "Plaquettes",
73
        "VMP",
74
    ]
75
    cell = df.loc["TCMH", "Valeur"]
76
    assert isinstance(cell, Span)
77
    assert cell.text == "31.6"