[811e40]: / run.py

Download this file

80 lines (63 with data), 2.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import fire
import json
import pickle
from pathlib import Path
from src.data import load_chia, load_fb
from src.prompt import few_shot_entity_recognition
from tqdm import tqdm
def process_chia(n: int = None, random: bool = False):
"""Processes the Chia dataset
Args:
n (int, optional): Number of rows to read. Defaults to None.
random (bool, optional): Whether to read rows randomly. Defaults to False.
"""
df = load_chia()
if random:
for _, row in df.sample(frac=1.)[:n].iterrows():
print(row["criteria"])
print("TRUE: ", row["drugs"], row["persons"], row["conditions"])
print("PREDICTED: ", few_shot_entity_recognition(row["criteria"]))
print("-" * 100)
else:
# iterate over rows of the dataframe
for _, row in df[:n].iterrows():
print(row["criteria"])
print(row["drugs"], row["persons"], row["conditions"])
print(few_shot_entity_recognition(row["criteria"]))
print("-" * 100)
def ner_fb(entity: str, n: int = None, random: bool = False, verbose: bool = False):
"""Applies the LLM prompting to extract NERs from the FB dataset
Args:
entity (str): Entity type
n (int, optional): Number of rows to read. Defaults to None.
random (bool, optional): Whether to read rows randomly. Defaults to False.
verbose (bool, optional): Whether to print the results. Defaults to False.
"""
df = load_fb()["test"]
results = []
few_shot_examples = Path("data/few-shots.json")
with open(few_shot_examples, "r") as f:
few_shot_examples = json.load(f)[entity]
if random:
for _, row in tqdm(df.sample(frac=1.)[:n].iterrows()):
criterion = row["criterion"]
ent_true = row[entity]
ent_pred = few_shot_entity_recognition(few_shot_examples, criterion, entity)
results.append((entity, criterion, ent_true, ent_pred))
else:
for _, row in tqdm(df[:n].iterrows()):
criterion = row["criterion"]
ent_true = row[entity]
ent_pred = few_shot_entity_recognition(few_shot_examples, criterion, entity)
results.append((entity, criterion, ent_true, ent_pred))
output_file = Path(f"data/{entity}_ner_results.pkl")
with open(output_file, "wb") as f:
pickle.dump(results, f)
if verbose:
for entity, criterion, ent_true, ent_pred in results:
print(criterion)
print("TRUE: ", ent_true)
print("PREDICTED: ", ent_pred)
print("-" * 100)
if __name__ == "__main__":
fire.Fire()