a b/eval_rag.py
1
2
#RAG evaluation is quite hard for me , I refer some documentation online
3
4
eval_dataset = Dataset.from_csv("haa_develAdmittimes.csv")
5
6
eval_dataset
7
8
!pip install llama-index -qU
9
10
from ragas.metrics import (
11
    answer_relevancy,
12
    faithfulness,
13
    context_recall,
14
    context_precision,
15
)
16
from ragas.metrics.critique import harmfulness
17
from ragas import evaluate
18
19
subject_id  hadm_id timestamp   observations
20
21
def create_ragas_dataset(rag_pipeline, eval_dataset):
22
  rag_dataset = []
23
  for row in tqdm(eval_dataset):
24
    answer = rag_pipeline({"query" : row["timestamp"]})
25
    rag_dataset.append(
26
        {"subject_id" : row["subject_id"],
27
         "answer" : answer["hadm_id"],
28
         "contexts" : [context.page_content for context in answer haa_develAdmittimes['hadm_id']],
29
         "observations" : [row["observations"]]
30
         }
31
    )
32
33
haa_develAdmittimes['combined'] = haa_develAdmittimes['hadm_id'].astype(str) + " at " + haa_develAdmittimes['admittime'].astype(str)
34
35
36
37
  rag_df = haa_develAdmittimes['combined']
38
  rag_eval_dataset = Dataset.from_pandas(haa_develAdmittimes['combined'])
39
  return rag_eval_dataset
40
41
def evaluate_ragas_dataset(ragas_dataset):
42
  result = evaluate(
43
    ragas_dataset,
44
    metrics=[
45
        context_precision,
46
        faithfulness,
47
        answer_relevancy,
48
        context_recall,
49
    ],
50
  )
51
  return result
52
53
"""Lets create our dataset first:"""
54
55
from tqdm import tqdm
56
import pandas as pd
57
58
basic_qa_ragas_dataset = create_ragas_dataset(qa_chain, eval_dataset)
59
60
"""Save it for later:"""
61
62
basic_qa_ragas_dataset.to_csv("basic_qa_ragas_dataset.csv")
63
64
"""And finally - evaluate how it did!"""
65
66
basic_qa_result = evaluate_ragas_dataset(basic_qa_ragas_dataset)
67
68
basic_qa_result
69
70
"""### Testing Other Retrievers
71
72
Now we can test our how changing our Retriever impacts our RAGAS evaluation!
73
"""
74
75
def create_qa_chain(medical_retriever):
76
  primary_qa_llm = llm
77
78
  created_qa_chain = RetrievalQA.from_chain_type(
79
      primary_qa_llm,
80
      medical_retriever=medical_retriever,
81
      return_source_documents=True
82
  )
83
84
  return created_qa_chain
85
86
"""#### Parent Document Retriever
87
88
One of the easier ways we can imagine improving a retriever is to embed our documents into small chunks, and then retrieve a significant amount of additional context that "surrounds" the found context.
89
90
You can read more about this method [here](https://python.langchain.com/docs/modules/data_connection/retrievers/parent_document_retriever)!
91
"""
92
93
!pip install chromadb -qU
94
95
from langchain.retrievers import ParentDocumentRetriever
96
from langchain.storage import InMemoryStore
97
from langchain.vectorstores import Chroma
98
99
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=750)
100
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200)
101
102
vectorstore = Chroma(collection_name="split_parents", embedding_function=embeddings_model)
103
104
store = InMemoryStore()
105
106
parent_document_retriever = ParentDocumentRetriever(
107
    vectorstore=vectorstore,
108
    docstore=store,
109
    child_splitter=child_splitter,
110
    parent_splitter=parent_splitter,
111
)
112
113
parent_document_retriever.add_documents(base_docs)
114
115
"""Let's create, test, and then evaluate our new chain!"""
116
117
parent_document_retriever_qa_chain = create_qa_chain(parent_document_retriever)
118
119
parent_document_retriever_qa_chain({"query" : "What is RAG?"})["result"]
120
121
pdr_qa_ragas_dataset = create_ragas_dataset(parent_document_retriever_qa_chain, eval_dataset)
122
123
pdr_qa_ragas_dataset.to_csv("pdr_qa_ragas_dataset.csv")
124
125
pdr_qa_result = evaluate_ragas_dataset(pdr_qa_ragas_dataset)
126
127
pdr_qa_result
128
129
!pip install -q -U rank_bm25
130
131
from langchain.retrievers import BM25Retriever, EnsembleRetriever
132
133
text_splitter = RecursiveCharacterTextSplitter()
134
docs = text_splitter.split_documents(base_docs)
135
136
bm25_retriever = BM25Retriever.from_documents(docs)
137
bm25_retriever.k = 1
138
139
embedding = OpenAIEmbeddings()
140
vectorstore = Chroma.from_documents(docs, embedding)
141
chroma_retriever = vectorstore.as_retriever(search_kwargs={"k": 1})
142
143
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.5, 0.5])
144
145
ensemble_retriever_qa_chain = create_qa_chain(ensemble_retriever)
146
147
ensemble_retriever_qa_chain({"query" : "What subject id here ?"})["result"]
148
149
ensemble_qa_ragas_dataset = create_ragas_dataset(ensemble_retriever_qa_chain, eval_dataset)
150
151
ensemble_qa_ragas_dataset.to_csv("ensemble_qa_ragas_dataset.csv")
152
153
ensemble_qa_result = evaluate_ragas_dataset(ensemble_qa_ragas_dataset)
154
155
ensemble_qa_result
156
157
158
from rouge_score import rouge_scorer
159
160
def calculate_rouge_scores(references, predictions):
161
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
162
    scores = []
163
    for ref, pred in zip(references, predictions):
164
        score = scorer.score(ref, pred)
165
        scores.append(score)
166
    return scores
167
168
# Example usage with dummy data
169
references = ["subject_id  hadm_id   timestamp observations 0          12   112213  2104-08-05  C0392747 C0684224 C3273238 C3812171 C0700287 C... 1          12   112213  2104-08-07  C0392747 C0684224 C3273238 C1523018 C0700287  12   112213  2104-08-08  C0181904 C1552822 C0015392 C0450429 C0150369 C..." ]
170
171
172
predictions = ["2          12   112213  2104-08-08  C0181904 C1552822 C0015392 C0450429 C0150369 C...3          12   112213  2104-08-08  C0392747 C0684224 C3273238 C0202059 C4050465 C.."]
173
174
rouge_scores = calculate_rouge_scores(references, predictions)
175
for score in rouge_scores:
176
    print(score)