133 lines (132 with data), 4.5 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import getpass\n",
"import os\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv('../.env')\n",
"openai_access_key = os.getenv('OPENAI_ACCESS_KEY')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import List, Literal, Union\n",
"import datetime\n",
"from typing import Literal, Optional, Tuple\n",
"\n",
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
"\n",
"class Filter(BaseModel):\n",
" field: Literal[\"Disease\", \"Gene\", \"Protein\", \"DNAMutation\", \"ProteinMutation\", \"SNP\", \"Cell_type\", \"Drug\", \"Sign_symptom\", \"Biological_structure\", \"Date\", \n",
" \"Duration\", \"Time\", \"Frequency\", \"Severity\", \"Lab_value\", \"Dosage\", \"Diagnostic_procedure\", \"Therapeutic_procedure\", \n",
" \"Medication\", \"Clinical_event\", \"Outcome\", \"History\", \"Subject\", \"Family_history\", \"Detailed_description\", \"Area\"]\n",
" \n",
" comparison: Literal[\"eq\", \"lt\", \"lte\", \"gt\", \"gte\"]\n",
" value: Union[str] = Field(\n",
" ...,\n",
" description=\"If the field is Gene, write the official symbol from the NCBIGene knowledge bases\",\n",
" )\n",
"\n",
"\n",
"class Search(BaseModel):\n",
" \"\"\"Search over a database of tutorial videos about a software library.\"\"\"\n",
"\n",
" content_search: str = Field(\n",
" ...,\n",
" description=\"Similarity search query applied to video transcripts.\",\n",
" )\n",
" title_search: str = Field(\n",
" ...,\n",
" description=(\n",
" \"Alternate version of the content search query to apply to titles. \"\n",
" \"Should be succinct and only include key words that could be in a clinical trial text\"\n",
" \"title.\"\n",
" ),\n",
" )\n",
" filters: List[Filter] = Field(\n",
" default_factory=list,\n",
" description=\"Filters over specific fields. Final condition is a logical conjunction of all filters.\",\n",
" )\n",
"\n",
" def pretty_print(self) -> None:\n",
" for field in self.__fields__:\n",
" if getattr(self, field) is not None and getattr(self, field) != getattr(\n",
" self.__fields__[field], \"default\", None\n",
" ):\n",
" print(f\"{field}: {getattr(self, field)}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"system = \"\"\"You are an expert at converting user questions into database queries. \\\n",
"You have access to a database of tutorial videos about a software library for building LLM-powered applications. \\\n",
"Given a question, return a database query optimized to retrieve the most relevant results.\n",
"\n",
"If there are acronyms or words you are not familiar with, do not try to rephrase them.\"\"\"\n",
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", system),\n",
" (\"human\", \"{question}\"),\n",
" ]\n",
")\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
"structured_llm = llm.with_structured_output(TutorialSearch)\n",
"query_analyzer = prompt | structured_llm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query_analyzer.invoke({\"question\": \"\"}).pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}