a b/aitrika/utils/text_parser.py
1
from llama_index.core import Document
2
from llama_index.core.node_parser import SimpleNodeParser
3
from typing import List
4
from aitrika.config.config import LLMConfig
5
6
7
config = LLMConfig()
8
9
10
def generate_documents(content: str) -> List:
11
    """
12
    Generate input documents for LlamaIndex.
13
14
    Args:
15
        content (str): Text
16
17
    Returns:
18
        List: List of chunks as Document
19
    """
20
    parser = SimpleNodeParser(
21
        chunk_size=config.CHUNK_SIZE, chunk_overlap=config.CHUNK_OVERLAP
22
    )
23
    doc = Document(text=content, id=content.partition("\n")[0])
24
    documents = parser.get_nodes_from_documents([doc])
25
    return documents