a b/aitrika/llm/openai.py
1
from llama_index.llms.openai import OpenAI
2
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
3
from llama_index.core.node_parser import SimpleNodeParser
4
from llama_index.core import (
5
    VectorStoreIndex,
6
    Settings,
7
    StorageContext,
8
    load_index_from_storage,
9
    Document,
10
)
11
from llama_index.vector_stores.lancedb import LanceDBVectorStore
12
import os
13
from aitrika.llm.base_llm import BaseLLM
14
from aitrika.config.config import LLMConfig
15
16
17
class OpenAILLM(BaseLLM):
18
    config = LLMConfig()
19
20
    def __init__(
21
        self, documents: Document, api_key: str, model_name: str = "gpt-4o-mini"
22
    ):
23
        self.documents = documents
24
        self.model_name = model_name
25
        if not api_key:
26
            raise ValueError("API key is required for OpenAI.")
27
        self.api_key = api_key
28
29
    def _build_index(self):
30
        llm = OpenAI(model=self.model_name, token=self.api_key)
31
        embed_model = HuggingFaceEmbedding(
32
            model_name=self.config.DEFAULT_EMBEDDINGS,
33
            cache_folder=f"aitrika/rag/embeddings/{self.config.DEFAULT_EMBEDDINGS.replace('/','_')}",
34
        )
35
        Settings.llm = llm
36
        Settings.embed_model = embed_model
37
        Settings.chunk_size = self.config.CHUNK_SIZE
38
        Settings.chunk_overlap = self.config.CHUNK_OVERLAP
39
        Settings.context_window = self.config.CONTEXT_WINDOW
40
        Settings.num_output = self.config.NUM_OUTPUT
41
42
        if os.path.exists("aitrika/rag/vectorstores/openai"):
43
            vector_store = LanceDBVectorStore(uri="aitrika/rag/vectorstores/openai")
44
            storage_context = StorageContext.from_defaults(
45
                vector_store=vector_store, persist_dir="aitrika/rag/vectorstores/openai"
46
            )
47
            index = load_index_from_storage(storage_context=storage_context)
48
            parser = SimpleNodeParser()
49
            new_nodes = parser.get_nodes_from_documents(self.documents)
50
            index.insert_nodes(new_nodes)
51
            index = load_index_from_storage(storage_context=storage_context)
52
        else:
53
            vector_store = LanceDBVectorStore(uri="aitrika/rag/vectorstores/openai")
54
            storage_context = StorageContext.from_defaults(vector_store=vector_store)
55
            index = VectorStoreIndex(
56
                nodes=self.documents, storage_context=storage_context
57
            )
58
            index.storage_context.persist(persist_dir="aitrika/rag/vectorstores/openai")
59
        self.index = index
60
61
    def query(self, query: str):
62
        self._build_index()
63
        query_engine = self.index.as_query_engine()
64
        response = query_engine.query(query)
65
        return str(response).strip()