criteria_parser / Git / [811e40] /run.py

Models:
joseph-gordon/
criteria_parser
Downloads: 1
[811e40]: / run.py
History
Download this file
80 lines (63 with data), 2.7 kB

import fire
import json
import pickle
from pathlib import Path
from src.data import load_chia, load_fb
from src.prompt import few_shot_entity_recognition
from tqdm import tqdm


def process_chia(n: int = None, random: bool = False):
    """Processes the Chia dataset

    Args:
        n (int, optional): Number of rows to read. Defaults to None.
        random (bool, optional): Whether to read rows randomly. Defaults to False.
    """
    df = load_chia()

    if random:
        for _, row in df.sample(frac=1.)[:n].iterrows():
            print(row["criteria"])
            print("TRUE: ", row["drugs"], row["persons"], row["conditions"])
            print("PREDICTED: ", few_shot_entity_recognition(row["criteria"]))
            print("-" * 100)
    else:
        # iterate over rows of the dataframe
        for _, row in df[:n].iterrows():
            print(row["criteria"])
            print(row["drugs"], row["persons"], row["conditions"])
            print(few_shot_entity_recognition(row["criteria"]))
            print("-" * 100)


def ner_fb(entity: str, n: int = None, random: bool = False, verbose: bool = False):
    """Applies the LLM prompting to extract NERs from the FB dataset

    Args:
        entity (str): Entity type
        n (int, optional): Number of rows to read. Defaults to None.
        random (bool, optional): Whether to read rows randomly. Defaults to False.
        verbose (bool, optional): Whether to print the results. Defaults to False.
    """
    df = load_fb()["test"]

    results = []

    few_shot_examples = Path("data/few-shots.json")
    with open(few_shot_examples, "r") as f:
        few_shot_examples = json.load(f)[entity]

    if random:
        for _, row in tqdm(df.sample(frac=1.)[:n].iterrows()):
            criterion = row["criterion"]
            ent_true = row[entity]
            ent_pred = few_shot_entity_recognition(few_shot_examples, criterion, entity)

            results.append((entity, criterion, ent_true, ent_pred))
    else:
        for _, row in tqdm(df[:n].iterrows()):
            criterion = row["criterion"]
            ent_true = row[entity]
            ent_pred = few_shot_entity_recognition(few_shot_examples, criterion, entity)

            results.append((entity, criterion, ent_true, ent_pred))

    output_file = Path(f"data/{entity}_ner_results.pkl")
    with open(output_file, "wb") as f:
        pickle.dump(results, f)

    if verbose:
        for entity, criterion, ent_true, ent_pred in results:
            print(criterion)
            print("TRUE: ", ent_true)
            print("PREDICTED: ", ent_pred)
            print("-" * 100)


if __name__ == "__main__":
    fire.Fire()