TrialMatchAI / Git / [de07e6] /src/Matcher/LangChain_structuting

Models:
MarcoTheBlack/
TrialMatchAI
Downloads: 1
[de07e6]: / src / Matcher / LangChain_structuting_queries.ipynb
History
Download this file
133 lines (132 with data), 4.5 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import getpass\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv('../.env')\n",
    "openai_access_key = os.getenv('OPENAI_ACCESS_KEY')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List, Literal, Union\n",
    "import datetime\n",
    "from typing import Literal, Optional, Tuple\n",
    "\n",
    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
    "\n",
    "class Filter(BaseModel):\n",
    "    field: Literal[\"Disease\", \"Gene\", \"Protein\", \"DNAMutation\", \"ProteinMutation\", \"SNP\", \"Cell_type\", \"Drug\", \"Sign_symptom\", \"Biological_structure\", \"Date\", \n",
    "                   \"Duration\", \"Time\", \"Frequency\", \"Severity\", \"Lab_value\", \"Dosage\", \"Diagnostic_procedure\", \"Therapeutic_procedure\", \n",
    "                   \"Medication\", \"Clinical_event\", \"Outcome\", \"History\", \"Subject\", \"Family_history\", \"Detailed_description\", \"Area\"]\n",
    "    \n",
    "    comparison: Literal[\"eq\", \"lt\", \"lte\", \"gt\", \"gte\"]\n",
    "    value: Union[str] = Field(\n",
    "        ...,\n",
    "    description=\"If the field is Gene, write the official symbol from the NCBIGene knowledge bases\",\n",
    "    )\n",
    "\n",
    "\n",
    "class Search(BaseModel):\n",
    "    \"\"\"Search over a database of tutorial videos about a software library.\"\"\"\n",
    "\n",
    "    content_search: str = Field(\n",
    "        ...,\n",
    "        description=\"Similarity search query applied to video transcripts.\",\n",
    "    )\n",
    "    title_search: str = Field(\n",
    "        ...,\n",
    "        description=(\n",
    "            \"Alternate version of the content search query to apply to titles. \"\n",
    "            \"Should be succinct and only include key words that could be in a clinical trial text\"\n",
    "            \"title.\"\n",
    "        ),\n",
    "    )\n",
    "    filters: List[Filter] = Field(\n",
    "        default_factory=list,\n",
    "        description=\"Filters over specific fields. Final condition is a logical conjunction of all filters.\",\n",
    "    )\n",
    "\n",
    "    def pretty_print(self) -> None:\n",
    "        for field in self.__fields__:\n",
    "            if getattr(self, field) is not None and getattr(self, field) != getattr(\n",
    "                self.__fields__[field], \"default\", None\n",
    "            ):\n",
    "                print(f\"{field}: {getattr(self, field)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "system = \"\"\"You are an expert at converting user questions into database queries. \\\n",
    "You have access to a database of tutorial videos about a software library for building LLM-powered applications. \\\n",
    "Given a question, return a database query optimized to retrieve the most relevant results.\n",
    "\n",
    "If there are acronyms or words you are not familiar with, do not try to rephrase them.\"\"\"\n",
    "prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\"system\", system),\n",
    "        (\"human\", \"{question}\"),\n",
    "    ]\n",
    ")\n",
    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
    "structured_llm = llm.with_structured_output(TutorialSearch)\n",
    "query_analyzer = prompt | structured_llm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_analyzer.invoke({\"question\": \"\"}).pretty_print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}