Switch to unified view

a b/src/Matcher/LangChain_structuting_queries.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import getpass\n",
10
    "import os\n",
11
    "from dotenv import load_dotenv\n",
12
    "\n",
13
    "load_dotenv('../.env')\n",
14
    "openai_access_key = os.getenv('OPENAI_ACCESS_KEY')"
15
   ]
16
  },
17
  {
18
   "cell_type": "code",
19
   "execution_count": null,
20
   "metadata": {},
21
   "outputs": [],
22
   "source": [
23
    "from typing import List, Literal, Union\n",
24
    "import datetime\n",
25
    "from typing import Literal, Optional, Tuple\n",
26
    "\n",
27
    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
28
    "\n",
29
    "class Filter(BaseModel):\n",
30
    "    field: Literal[\"Disease\", \"Gene\", \"Protein\", \"DNAMutation\", \"ProteinMutation\", \"SNP\", \"Cell_type\", \"Drug\", \"Sign_symptom\", \"Biological_structure\", \"Date\", \n",
31
    "                   \"Duration\", \"Time\", \"Frequency\", \"Severity\", \"Lab_value\", \"Dosage\", \"Diagnostic_procedure\", \"Therapeutic_procedure\", \n",
32
    "                   \"Medication\", \"Clinical_event\", \"Outcome\", \"History\", \"Subject\", \"Family_history\", \"Detailed_description\", \"Area\"]\n",
33
    "    \n",
34
    "    comparison: Literal[\"eq\", \"lt\", \"lte\", \"gt\", \"gte\"]\n",
35
    "    value: Union[str] = Field(\n",
36
    "        ...,\n",
37
    "    description=\"If the field is Gene, write the official symbol from the NCBIGene knowledge bases\",\n",
38
    "    )\n",
39
    "\n",
40
    "\n",
41
    "class Search(BaseModel):\n",
42
    "    \"\"\"Search over a database of tutorial videos about a software library.\"\"\"\n",
43
    "\n",
44
    "    content_search: str = Field(\n",
45
    "        ...,\n",
46
    "        description=\"Similarity search query applied to video transcripts.\",\n",
47
    "    )\n",
48
    "    title_search: str = Field(\n",
49
    "        ...,\n",
50
    "        description=(\n",
51
    "            \"Alternate version of the content search query to apply to titles. \"\n",
52
    "            \"Should be succinct and only include key words that could be in a clinical trial text\"\n",
53
    "            \"title.\"\n",
54
    "        ),\n",
55
    "    )\n",
56
    "    filters: List[Filter] = Field(\n",
57
    "        default_factory=list,\n",
58
    "        description=\"Filters over specific fields. Final condition is a logical conjunction of all filters.\",\n",
59
    "    )\n",
60
    "\n",
61
    "    def pretty_print(self) -> None:\n",
62
    "        for field in self.__fields__:\n",
63
    "            if getattr(self, field) is not None and getattr(self, field) != getattr(\n",
64
    "                self.__fields__[field], \"default\", None\n",
65
    "            ):\n",
66
    "                print(f\"{field}: {getattr(self, field)}\")"
67
   ]
68
  },
69
  {
70
   "cell_type": "code",
71
   "execution_count": null,
72
   "metadata": {},
73
   "outputs": [],
74
   "source": [
75
    "from langchain_core.prompts import ChatPromptTemplate\n",
76
    "from langchain_openai import ChatOpenAI\n",
77
    "\n",
78
    "system = \"\"\"You are an expert at converting user questions into database queries. \\\n",
79
    "You have access to a database of tutorial videos about a software library for building LLM-powered applications. \\\n",
80
    "Given a question, return a database query optimized to retrieve the most relevant results.\n",
81
    "\n",
82
    "If there are acronyms or words you are not familiar with, do not try to rephrase them.\"\"\"\n",
83
    "prompt = ChatPromptTemplate.from_messages(\n",
84
    "    [\n",
85
    "        (\"system\", system),\n",
86
    "        (\"human\", \"{question}\"),\n",
87
    "    ]\n",
88
    ")\n",
89
    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
90
    "structured_llm = llm.with_structured_output(TutorialSearch)\n",
91
    "query_analyzer = prompt | structured_llm"
92
   ]
93
  },
94
  {
95
   "cell_type": "code",
96
   "execution_count": null,
97
   "metadata": {},
98
   "outputs": [],
99
   "source": [
100
    "query_analyzer.invoke({\"question\": \"\"}).pretty_print()"
101
   ]
102
  },
103
  {
104
   "cell_type": "code",
105
   "execution_count": null,
106
   "metadata": {},
107
   "outputs": [],
108
   "source": []
109
  }
110
 ],
111
 "metadata": {
112
  "kernelspec": {
113
   "display_name": "base",
114
   "language": "python",
115
   "name": "python3"
116
  },
117
  "language_info": {
118
   "codemirror_mode": {
119
    "name": "ipython",
120
    "version": 3
121
   },
122
   "file_extension": ".py",
123
   "mimetype": "text/x-python",
124
   "name": "python",
125
   "nbconvert_exporter": "python",
126
   "pygments_lexer": "ipython3",
127
   "version": "3.11.4"
128
  }
129
 },
130
 "nbformat": 4,
131
 "nbformat_minor": 2
132
}