In [1]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv('../.env')
openai_access_key = os.getenv('OPENAI_ACCESS_KEY')

In [None]:
from typing import List, Literal, Union
import datetime
from typing import Literal, Optional, Tuple

from langchain_core.pydantic_v1 import BaseModel, Field

class Filter(BaseModel):
    field: Literal["Disease", "Gene", "Protein", "DNAMutation", "ProteinMutation", "SNP", "Cell_type", "Drug", "Sign_symptom", "Biological_structure", "Date", 
                   "Duration", "Time", "Frequency", "Severity", "Lab_value", "Dosage", "Diagnostic_procedure", "Therapeutic_procedure", 
                   "Medication", "Clinical_event", "Outcome", "History", "Subject", "Family_history", "Detailed_description", "Area"]
    
    comparison: Literal["eq", "lt", "lte", "gt", "gte"]
    value: Union[str] = Field(
        ...,
    description="If the field is Gene, write the official symbol from the NCBIGene knowledge bases",
    )


class Search(BaseModel):
    """Search over a database of tutorial videos about a software library."""

    content_search: str = Field(
        ...,
        description="Similarity search query applied to video transcripts.",
    )
    title_search: str = Field(
        ...,
        description=(
            "Alternate version of the content search query to apply to titles. "
            "Should be succinct and only include key words that could be in a clinical trial text"
            "title."
        ),
    )
    filters: List[Filter] = Field(
        default_factory=list,
        description="Filters over specific fields. Final condition is a logical conjunction of all filters.",
    )

    def pretty_print(self) -> None:
        for field in self.__fields__:
            if getattr(self, field) is not None and getattr(self, field) != getattr(
                self.__fields__[field], "default", None
            ):
                print(f"{field}: {getattr(self, field)}")

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

system = """You are an expert at converting user questions into database queries. \
You have access to a database of tutorial videos about a software library for building LLM-powered applications. \
Given a question, return a database query optimized to retrieve the most relevant results.

If there are acronyms or words you are not familiar with, do not try to rephrase them."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = llm.with_structured_output(TutorialSearch)
query_analyzer = prompt | structured_llm

In [None]:
query_analyzer.invoke({"question": ""}).pretty_print()