TrialMatchAI / Git / Diff of /src/Matcher/LangChain_structuting

Models:

MarcoTheBlack/

TrialMatchAI

Downloads: 1

Diff of /src/Matcher/LangChain_structuting_queries.ipynb [000000] .. [f87529]

Switch to unified view

 b/src/Matcher/LangChain_structuting_queries.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv('../.env')\n",
+    "openai_access_key = os.getenv('OPENAI_ACCESS_KEY')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List, Literal, Union\n",
+    "import datetime\n",
+    "from typing import Literal, Optional, Tuple\n",
+    "\n",
+    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "\n",
+    "class Filter(BaseModel):\n",
+    "    field: Literal[\"Disease\", \"Gene\", \"Protein\", \"DNAMutation\", \"ProteinMutation\", \"SNP\", \"Cell_type\", \"Drug\", \"Sign_symptom\", \"Biological_structure\", \"Date\", \n",
+    "                   \"Duration\", \"Time\", \"Frequency\", \"Severity\", \"Lab_value\", \"Dosage\", \"Diagnostic_procedure\", \"Therapeutic_procedure\", \n",
+    "                   \"Medication\", \"Clinical_event\", \"Outcome\", \"History\", \"Subject\", \"Family_history\", \"Detailed_description\", \"Area\"]\n",
+    "    \n",
+    "    comparison: Literal[\"eq\", \"lt\", \"lte\", \"gt\", \"gte\"]\n",
+    "    value: Union[str] = Field(\n",
+    "        ...,\n",
+    "    description=\"If the field is Gene, write the official symbol from the NCBIGene knowledge bases\",\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "class Search(BaseModel):\n",
+    "    \"\"\"Search over a database of tutorial videos about a software library.\"\"\"\n",
+    "\n",
+    "    content_search: str = Field(\n",
+    "        ...,\n",
+    "        description=\"Similarity search query applied to video transcripts.\",\n",
+    "    )\n",
+    "    title_search: str = Field(\n",
+    "        ...,\n",
+    "        description=(\n",
+    "            \"Alternate version of the content search query to apply to titles. \"\n",
+    "            \"Should be succinct and only include key words that could be in a clinical trial text\"\n",
+    "            \"title.\"\n",
+    "        ),\n",
+    "    )\n",
+    "    filters: List[Filter] = Field(\n",
+    "        default_factory=list,\n",
+    "        description=\"Filters over specific fields. Final condition is a logical conjunction of all filters.\",\n",
+    "    )\n",
+    "\n",
+    "    def pretty_print(self) -> None:\n",
+    "        for field in self.__fields__:\n",
+    "            if getattr(self, field) is not None and getattr(self, field) != getattr(\n",
+    "                self.__fields__[field], \"default\", None\n",
+    "            ):\n",
+    "                print(f\"{field}: {getattr(self, field)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "system = \"\"\"You are an expert at converting user questions into database queries. \\\n",
+    "You have access to a database of tutorial videos about a software library for building LLM-powered applications. \\\n",
+    "Given a question, return a database query optimized to retrieve the most relevant results.\n",
+    "\n",
+    "If there are acronyms or words you are not familiar with, do not try to rephrase them.\"\"\"\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", system),\n",
+    "        (\"human\", \"{question}\"),\n",
+    "    ]\n",
+    ")\n",
+    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
+    "structured_llm = llm.with_structured_output(TutorialSearch)\n",
+    "query_analyzer = prompt | structured_llm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_analyzer.invoke({\"question\": \"\"}).pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}