Switch to unified view

a b/Code/LangChain/RAPTOR/RAPTOR, kkawchak.ipynb
1
{
2
  "cells": [
3
    {
4
      "cell_type": "code",
5
      "execution_count": 1,
6
      "id": "3058e9ca-07c3-4eef-b98c-bc2f2dbb9cc6",
7
      "metadata": {
8
        "colab": {
9
          "base_uri": "https://localhost:8080/",
10
          "height": 0
11
        },
12
        "id": "3058e9ca-07c3-4eef-b98c-bc2f2dbb9cc6",
13
        "outputId": "b9260a9b-9a3b-4fe8-919d-a84f11852ce7"
14
      },
15
      "outputs": [
16
        {
17
          "output_type": "stream",
18
          "name": "stdout",
19
          "text": [
20
            "Collecting langchain\n",
21
            "  Downloading langchain-0.1.20-py3-none-any.whl (1.0 MB)\n",
22
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
23
            "\u001b[?25hCollecting umap-learn\n",
24
            "  Downloading umap_learn-0.5.6-py3-none-any.whl (85 kB)\n",
25
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.7/85.7 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
26
            "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.2.2)\n",
27
            "Collecting scikit-learn\n",
28
            "  Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)\n",
29
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.1/12.1 MB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
30
            "\u001b[?25hCollecting langchain_community\n",
31
            "  Downloading langchain_community-0.0.38-py3-none-any.whl (2.0 MB)\n",
32
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
33
            "\u001b[?25hCollecting tiktoken\n",
34
            "  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
35
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
36
            "\u001b[?25hCollecting langchain-openai\n",
37
            "  Downloading langchain_openai-0.1.7-py3-none-any.whl (34 kB)\n",
38
            "Collecting langchainhub\n",
39
            "  Downloading langchainhub-0.1.15-py3-none-any.whl (4.6 kB)\n",
40
            "Collecting chromadb\n",
41
            "  Downloading chromadb-0.5.0-py3-none-any.whl (526 kB)\n",
42
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[31m44.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
43
            "\u001b[?25hCollecting langchain-anthropic\n",
44
            "  Downloading langchain_anthropic-0.1.12-py3-none-any.whl (16 kB)\n",
45
            "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n",
46
            "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.30)\n",
47
            "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.5)\n",
48
            "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n",
49
            "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n",
50
            "  Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)\n",
51
            "Collecting langchain-core<0.2.0,>=0.1.52 (from langchain)\n",
52
            "  Downloading langchain_core-0.1.52-py3-none-any.whl (302 kB)\n",
53
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.9/302.9 kB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
            "\u001b[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)\n",
55
            "  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n",
56
            "Collecting langsmith<0.2.0,>=0.1.17 (from langchain)\n",
57
            "  Downloading langsmith-0.1.58-py3-none-any.whl (121 kB)\n",
58
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.1/121.1 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
59
            "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.25.2)\n",
60
            "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.7.1)\n",
61
            "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n",
62
            "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.3.0)\n",
63
            "Requirement already satisfied: scipy>=1.3.1 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.11.4)\n",
64
            "Requirement already satisfied: numba>=0.51.2 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (0.58.1)\n",
65
            "Collecting pynndescent>=0.5 (from umap-learn)\n",
66
            "  Downloading pynndescent-0.5.12-py3-none-any.whl (56 kB)\n",
67
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.8/56.8 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
68
            "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from umap-learn) (4.66.4)\n",
69
            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n",
70
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n",
71
            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2023.12.25)\n",
72
            "Collecting openai<2.0.0,>=1.24.0 (from langchain-openai)\n",
73
            "  Downloading openai-1.30.1-py3-none-any.whl (320 kB)\n",
74
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m320.6/320.6 kB\u001b[0m \u001b[31m32.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
75
            "\u001b[?25hCollecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)\n",
76
            "  Downloading types_requests-2.31.0.20240406-py3-none-any.whl (15 kB)\n",
77
            "Requirement already satisfied: build>=1.0.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.2.1)\n",
78
            "Collecting chroma-hnswlib==0.7.3 (from chromadb)\n",
79
            "  Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
80
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
81
            "\u001b[?25hCollecting fastapi>=0.95.2 (from chromadb)\n",
82
            "  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n",
83
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
84
            "\u001b[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)\n",
85
            "  Downloading uvicorn-0.29.0-py3-none-any.whl (60 kB)\n",
86
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
87
            "\u001b[?25hCollecting posthog>=2.4.0 (from chromadb)\n",
88
            "  Downloading posthog-3.5.0-py2.py3-none-any.whl (41 kB)\n",
89
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
90
            "\u001b[?25hRequirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.11.0)\n",
91
            "Collecting onnxruntime>=1.14.1 (from chromadb)\n",
92
            "  Downloading onnxruntime-1.17.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n",
93
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m88.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
94
            "\u001b[?25hCollecting opentelemetry-api>=1.2.0 (from chromadb)\n",
95
            "  Downloading opentelemetry_api-1.24.0-py3-none-any.whl (60 kB)\n",
96
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.1/60.1 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
97
            "\u001b[?25hCollecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)\n",
98
            "  Downloading opentelemetry_exporter_otlp_proto_grpc-1.24.0-py3-none-any.whl (18 kB)\n",
99
            "Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromadb)\n",
100
            "  Downloading opentelemetry_instrumentation_fastapi-0.45b0-py3-none-any.whl (11 kB)\n",
101
            "Collecting opentelemetry-sdk>=1.2.0 (from chromadb)\n",
102
            "  Downloading opentelemetry_sdk-1.24.0-py3-none-any.whl (106 kB)\n",
103
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.1/106.1 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
104
            "\u001b[?25hRequirement already satisfied: tokenizers>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.19.1)\n",
105
            "Collecting pypika>=0.48.9 (from chromadb)\n",
106
            "  Downloading PyPika-0.48.9.tar.gz (67 kB)\n",
107
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
108
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
109
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
110
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
111
            "Collecting overrides>=7.3.1 (from chromadb)\n",
112
            "  Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n",
113
            "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.4.0)\n",
114
            "Requirement already satisfied: grpcio>=1.58.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.63.0)\n",
115
            "Collecting bcrypt>=4.0.1 (from chromadb)\n",
116
            "  Downloading bcrypt-4.1.3-cp39-abi3-manylinux_2_28_x86_64.whl (283 kB)\n",
117
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[31m23.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
118
            "\u001b[?25hRequirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.9.4)\n",
119
            "Collecting kubernetes>=28.1.0 (from chromadb)\n",
120
            "  Downloading kubernetes-29.0.0-py2.py3-none-any.whl (1.6 MB)\n",
121
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m54.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
122
            "\u001b[?25hCollecting mmh3>=4.0.1 (from chromadb)\n",
123
            "  Downloading mmh3-4.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (67 kB)\n",
124
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
125
            "\u001b[?25hCollecting orjson>=3.9.12 (from chromadb)\n",
126
            "  Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n",
127
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
128
            "\u001b[?25hCollecting anthropic<1,>=0.23.0 (from langchain-anthropic)\n",
129
            "  Downloading anthropic-0.25.9-py3-none-any.whl (871 kB)\n",
130
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m871.1/871.1 kB\u001b[0m \u001b[31m59.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
131
            "\u001b[?25hRequirement already satisfied: defusedxml<0.8.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from langchain-anthropic) (0.7.1)\n",
132
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
133
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n",
134
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n",
135
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n",
136
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n",
137
            "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from anthropic<1,>=0.23.0->langchain-anthropic) (3.7.1)\n",
138
            "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from anthropic<1,>=0.23.0->langchain-anthropic) (1.7.0)\n",
139
            "Collecting httpx<1,>=0.23.0 (from anthropic<1,>=0.23.0->langchain-anthropic)\n",
140
            "  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n",
141
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
142
            "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from anthropic<1,>=0.23.0->langchain-anthropic) (1.3.1)\n",
143
            "Requirement already satisfied: packaging>=19.1 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (24.0)\n",
144
            "Requirement already satisfied: pyproject_hooks in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (1.1.0)\n",
145
            "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (2.0.1)\n",
146
            "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n",
147
            "  Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
148
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
149
            "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n",
150
            "  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
151
            "Collecting starlette<0.38.0,>=0.37.2 (from fastapi>=0.95.2->chromadb)\n",
152
            "  Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n",
153
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
154
            "\u001b[?25hCollecting fastapi-cli>=0.0.2 (from fastapi>=0.95.2->chromadb)\n",
155
            "  Downloading fastapi_cli-0.0.3-py3-none-any.whl (9.2 kB)\n",
156
            "Requirement already satisfied: jinja2>=2.11.2 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (3.1.4)\n",
157
            "Collecting python-multipart>=0.0.7 (from fastapi>=0.95.2->chromadb)\n",
158
            "  Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n",
159
            "Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi>=0.95.2->chromadb)\n",
160
            "  Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n",
161
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
162
            "\u001b[?25hCollecting email_validator>=2.0.0 (from fastapi>=0.95.2->chromadb)\n",
163
            "  Downloading email_validator-2.1.1-py3-none-any.whl (30 kB)\n",
164
            "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2024.2.2)\n",
165
            "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n",
166
            "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.8.2)\n",
167
            "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.27.0)\n",
168
            "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.8.0)\n",
169
            "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.3.1)\n",
170
            "Requirement already satisfied: oauthlib>=3.2.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n",
171
            "Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.0.7)\n",
172
            "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.2.0,>=0.1.52->langchain)\n",
173
            "  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
174
            "Collecting packaging>=19.1 (from build>=1.0.3->chromadb)\n",
175
            "  Downloading packaging-23.2-py3-none-any.whl (53 kB)\n",
176
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
177
            "\u001b[?25hRequirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.2->umap-learn) (0.41.1)\n",
178
            "Collecting coloredlogs (from onnxruntime>=1.14.1->chromadb)\n",
179
            "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
180
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
181
            "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (24.3.25)\n",
182
            "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (3.20.3)\n",
183
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n",
184
            "Collecting deprecated>=1.2.6 (from opentelemetry-api>=1.2.0->chromadb)\n",
185
            "  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n",
186
            "Collecting importlib-metadata<=7.0,>=6.0 (from opentelemetry-api>=1.2.0->chromadb)\n",
187
            "  Downloading importlib_metadata-7.0.0-py3-none-any.whl (23 kB)\n",
188
            "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.0)\n",
189
            "Collecting opentelemetry-exporter-otlp-proto-common==1.24.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n",
190
            "  Downloading opentelemetry_exporter_otlp_proto_common-1.24.0-py3-none-any.whl (17 kB)\n",
191
            "Collecting opentelemetry-proto==1.24.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n",
192
            "  Downloading opentelemetry_proto-1.24.0-py3-none-any.whl (50 kB)\n",
193
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
194
            "\u001b[?25hCollecting opentelemetry-instrumentation-asgi==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n",
195
            "  Downloading opentelemetry_instrumentation_asgi-0.45b0-py3-none-any.whl (14 kB)\n",
196
            "Collecting opentelemetry-instrumentation==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n",
197
            "  Downloading opentelemetry_instrumentation-0.45b0-py3-none-any.whl (28 kB)\n",
198
            "Collecting opentelemetry-semantic-conventions==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n",
199
            "  Downloading opentelemetry_semantic_conventions-0.45b0-py3-none-any.whl (36 kB)\n",
200
            "Collecting opentelemetry-util-http==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n",
201
            "  Downloading opentelemetry_util_http-0.45b0-py3-none-any.whl (6.9 kB)\n",
202
            "Requirement already satisfied: setuptools>=16.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (67.7.2)\n",
203
            "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.14.1)\n",
204
            "Collecting asgiref~=3.0 (from opentelemetry-instrumentation-asgi==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n",
205
            "  Downloading asgiref-3.8.1-py3-none-any.whl (23 kB)\n",
206
            "Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb)\n",
207
            "  Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n",
208
            "Collecting backoff>=1.10.0 (from posthog>=2.4.0->chromadb)\n",
209
            "  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n",
210
            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (0.6.0)\n",
211
            "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (2.18.2)\n",
212
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n",
213
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.7)\n",
214
            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n",
215
            "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.13.2->chromadb) (0.20.3)\n",
216
            "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (8.1.7)\n",
217
            "Collecting h11>=0.8 (from uvicorn[standard]>=0.18.3->chromadb)\n",
218
            "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
219
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
220
            "\u001b[?25hCollecting httptools>=0.5.0 (from uvicorn[standard]>=0.18.3->chromadb)\n",
221
            "  Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n",
222
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m32.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
223
            "\u001b[?25hCollecting python-dotenv>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n",
224
            "  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
225
            "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.18.3->chromadb)\n",
226
            "  Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
227
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m94.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
228
            "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n",
229
            "  Downloading watchfiles-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
230
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m75.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
231
            "\u001b[?25hCollecting websockets>=10.4 (from uvicorn[standard]>=0.18.3->chromadb)\n",
232
            "  Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
233
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
234
            "\u001b[?25hRequirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->anthropic<1,>=0.23.0->langchain-anthropic) (1.2.1)\n",
235
            "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi>=0.95.2->chromadb)\n",
236
            "  Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n",
237
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
238
            "\u001b[?25hCollecting typer>=0.9.0 (from chromadb)\n",
239
            "  Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
240
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
241
            "\u001b[?25hCollecting shellingham>=1.3.0 (from typer>=0.9.0->chromadb)\n",
242
            "  Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
243
            "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (13.7.1)\n",
244
            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n",
245
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.0)\n",
246
            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n",
247
            "Collecting httpcore==1.* (from httpx<1,>=0.23.0->anthropic<1,>=0.23.0->langchain-anthropic)\n",
248
            "  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
249
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
250
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (3.14.0)\n",
251
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (2023.6.0)\n",
252
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=7.0,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.18.1)\n",
253
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.2->fastapi>=0.95.2->chromadb) (2.1.5)\n",
254
            "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.2.0,>=0.1.52->langchain)\n",
255
            "  Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n",
256
            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (3.0.0)\n",
257
            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.16.1)\n",
258
            "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\n",
259
            "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
260
            "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb)\n",
261
            "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
262
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
263
            "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n",
264
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer>=0.9.0->chromadb) (0.1.2)\n",
265
            "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.6.0)\n",
266
            "Building wheels for collected packages: pypika\n",
267
            "  Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
268
            "  Created wheel for pypika: filename=PyPika-0.48.9-py2.py3-none-any.whl size=53724 sha256=156d16846b70fc491a62f6d35652354150249f0e7a28e62688646d92f83dbca3\n",
269
            "  Stored in directory: /root/.cache/pip/wheels/e1/26/51/d0bffb3d2fd82256676d7ad3003faea3bd6dddc9577af665f4\n",
270
            "Successfully built pypika\n",
271
            "Installing collected packages: pypika, monotonic, mmh3, websockets, uvloop, ujson, types-requests, shellingham, python-multipart, python-dotenv, packaging, overrides, orjson, opentelemetry-util-http, opentelemetry-semantic-conventions, opentelemetry-proto, mypy-extensions, jsonpointer, importlib-metadata, humanfriendly, httptools, h11, dnspython, deprecated, chroma-hnswlib, bcrypt, backoff, asgiref, watchfiles, uvicorn, typing-inspect, tiktoken, starlette, scikit-learn, posthog, opentelemetry-exporter-otlp-proto-common, opentelemetry-api, marshmallow, langchainhub, jsonpatch, httpcore, email_validator, coloredlogs, typer, pynndescent, opentelemetry-sdk, opentelemetry-instrumentation, onnxruntime, langsmith, kubernetes, httpx, dataclasses-json, umap-learn, opentelemetry-instrumentation-asgi, opentelemetry-exporter-otlp-proto-grpc, openai, langchain-core, anthropic, opentelemetry-instrumentation-fastapi, langchain-text-splitters, langchain-openai, langchain_community, langchain-anthropic, langchain, fastapi-cli, fastapi, chromadb\n",
272
            "  Attempting uninstall: packaging\n",
273
            "    Found existing installation: packaging 24.0\n",
274
            "    Uninstalling packaging-24.0:\n",
275
            "      Successfully uninstalled packaging-24.0\n",
276
            "  Attempting uninstall: importlib-metadata\n",
277
            "    Found existing installation: importlib_metadata 7.1.0\n",
278
            "    Uninstalling importlib_metadata-7.1.0:\n",
279
            "      Successfully uninstalled importlib_metadata-7.1.0\n",
280
            "  Attempting uninstall: scikit-learn\n",
281
            "    Found existing installation: scikit-learn 1.2.2\n",
282
            "    Uninstalling scikit-learn-1.2.2:\n",
283
            "      Successfully uninstalled scikit-learn-1.2.2\n",
284
            "  Attempting uninstall: typer\n",
285
            "    Found existing installation: typer 0.9.4\n",
286
            "    Uninstalling typer-0.9.4:\n",
287
            "      Successfully uninstalled typer-0.9.4\n",
288
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
289
            "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
290
            "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
291
            "\u001b[0mSuccessfully installed anthropic-0.25.9 asgiref-3.8.1 backoff-2.2.1 bcrypt-4.1.3 chroma-hnswlib-0.7.3 chromadb-0.5.0 coloredlogs-15.0.1 dataclasses-json-0.6.6 deprecated-1.2.14 dnspython-2.6.1 email_validator-2.1.1 fastapi-0.111.0 fastapi-cli-0.0.3 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 humanfriendly-10.0 importlib-metadata-7.0.0 jsonpatch-1.33 jsonpointer-2.4 kubernetes-29.0.0 langchain-0.1.20 langchain-anthropic-0.1.12 langchain-core-0.1.52 langchain-openai-0.1.7 langchain-text-splitters-0.0.1 langchain_community-0.0.38 langchainhub-0.1.15 langsmith-0.1.58 marshmallow-3.21.2 mmh3-4.1.0 monotonic-1.6 mypy-extensions-1.0.0 onnxruntime-1.17.3 openai-1.30.1 opentelemetry-api-1.24.0 opentelemetry-exporter-otlp-proto-common-1.24.0 opentelemetry-exporter-otlp-proto-grpc-1.24.0 opentelemetry-instrumentation-0.45b0 opentelemetry-instrumentation-asgi-0.45b0 opentelemetry-instrumentation-fastapi-0.45b0 opentelemetry-proto-1.24.0 opentelemetry-sdk-1.24.0 opentelemetry-semantic-conventions-0.45b0 opentelemetry-util-http-0.45b0 orjson-3.10.3 overrides-7.7.0 packaging-23.2 posthog-3.5.0 pynndescent-0.5.12 pypika-0.48.9 python-dotenv-1.0.1 python-multipart-0.0.9 scikit-learn-1.4.2 shellingham-1.5.4 starlette-0.37.2 tiktoken-0.7.0 typer-0.12.3 types-requests-2.31.0.20240406 typing-inspect-0.9.0 ujson-5.10.0 umap-learn-0.5.6 uvicorn-0.29.0 uvloop-0.19.0 watchfiles-0.21.0 websockets-12.0\n"
292
          ]
293
        }
294
      ],
295
      "source": [
296
        "pip install -U langchain umap-learn scikit-learn langchain_community tiktoken langchain-openai langchainhub chromadb langchain-anthropic"
297
      ]
298
    },
299
    {
300
      "cell_type": "markdown",
301
      "id": "ea54c848-0df6-474e-b266-218a2acf67d3",
302
      "metadata": {
303
        "id": "ea54c848-0df6-474e-b266-218a2acf67d3"
304
      },
305
      "source": [
306
        "# RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval\n",
307
        "\n",
308
        "The [RAPTOR](https://arxiv.org/pdf/2401.18059.pdf) paper presents an interesting approaching for indexing and retrieval of documents:\n",
309
        "\n",
310
        "* The `leafs` are a set of starting documents\n",
311
        "* Leafs are embedded and clustered\n",
312
        "* Clusters are then summarized into higher level (more abstract) consolidations of information across similar documents\n",
313
        "\n",
314
        "This process is done recursivly, resulting in a \"tree\" going from raw docs (`leafs`) to more abstract summaries.\n",
315
        "\n",
316
        "We can applying this at varying scales; `leafs` can be:\n",
317
        "\n",
318
        "* Text chunks from a single doc (as shown in the paper)\n",
319
        "* Full docs (as we show below)\n",
320
        "\n",
321
        "With longer context LLMs, it's possible to perform this over full documents.\n",
322
        "\n",
323
        "![Screenshot 2024-03-04 at 12.45.25 PM.png](attachment:72039e0c-e8c4-4b17-8780-04ad9fc584f3.png)"
324
      ]
325
    },
326
    {
327
      "cell_type": "code",
328
      "source": [
329
        "import os\n",
330
        "# Optional, add tracing in LangSmith\n",
331
        "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
332
        "os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n",
333
        "os.environ[\"LANGCHAIN_PROJECT\"] = \"RAPTOR\"\n",
334
        "os.environ['LANGCHAIN_API_KEY'] = ''"
335
      ],
336
      "metadata": {
337
        "id": "lk-aKD_W1kwq"
338
      },
339
      "id": "lk-aKD_W1kwq",
340
      "execution_count": 3,
341
      "outputs": []
342
    },
343
    {
344
      "cell_type": "markdown",
345
      "id": "083dd961-b401-4fc6-867c-8f8950059b02",
346
      "metadata": {
347
        "id": "083dd961-b401-4fc6-867c-8f8950059b02"
348
      },
349
      "source": [
350
        "### Docs\n",
351
        "\n",
352
        "Let's apply this to LangChain's LCEL documentation.\n",
353
        "\n",
354
        "In this case, each `doc` is a unique web page of the LCEL docs.\n",
355
        "\n",
356
        "The context varies from < 2k tokens on up to > 10k tokens."
357
      ]
358
    },
359
    {
360
      "cell_type": "code",
361
      "execution_count": 4,
362
      "id": "b17c1331-373f-491d-8b53-ccf634e68c8e",
363
      "metadata": {
364
        "colab": {
365
          "base_uri": "https://localhost:8080/",
366
          "height": 733
367
        },
368
        "id": "b17c1331-373f-491d-8b53-ccf634e68c8e",
369
        "outputId": "ddb2b0ae-58f0-4e3e-b680-cda89f0f2c30"
370
      },
371
      "outputs": [
372
        {
373
          "output_type": "execute_result",
374
          "data": {
375
            "text/plain": [
376
              "<function matplotlib.pyplot.show(close=None, block=None)>"
377
            ],
378
            "text/html": [
379
              "<div style=\"max-width:800px; border: 1px solid var(--colab-border-color);\"><style>\n",
380
              "      pre.function-repr-contents {\n",
381
              "        overflow-x: auto;\n",
382
              "        padding: 8px 12px;\n",
383
              "        max-height: 500px;\n",
384
              "      }\n",
385
              "\n",
386
              "      pre.function-repr-contents.function-repr-contents-collapsed {\n",
387
              "        cursor: pointer;\n",
388
              "        max-height: 100px;\n",
389
              "      }\n",
390
              "    </style>\n",
391
              "    <pre style=\"white-space: initial; background:\n",
392
              "         var(--colab-secondary-surface-color); padding: 8px 12px;\n",
393
              "         border-bottom: 1px solid var(--colab-border-color);\"><b>matplotlib.pyplot.show</b><br/>def show(*args, **kwargs)</pre><pre class=\"function-repr-contents function-repr-contents-collapsed\" style=\"\"><a class=\"filepath\" style=\"display:none\" href=\"#\">/usr/local/lib/python3.10/dist-packages/matplotlib/pyplot.py</a>Display all open figures.\n",
394
              "\n",
395
              "Parameters\n",
396
              "----------\n",
397
              "block : bool, optional\n",
398
              "    Whether to wait for all figures to be closed before returning.\n",
399
              "\n",
400
              "    If `True` block and run the GUI main loop until all figure windows\n",
401
              "    are closed.\n",
402
              "\n",
403
              "    If `False` ensure that all figure windows are displayed and return\n",
404
              "    immediately.  In this case, you are responsible for ensuring\n",
405
              "    that the event loop is running to have responsive figures.\n",
406
              "\n",
407
              "    Defaults to True in non-interactive mode and to False in interactive\n",
408
              "    mode (see `.pyplot.isinteractive`).\n",
409
              "\n",
410
              "See Also\n",
411
              "--------\n",
412
              "ion : Enable interactive mode, which shows / updates the figure after\n",
413
              "      every plotting command, so that calling ``show()`` is not necessary.\n",
414
              "ioff : Disable interactive mode.\n",
415
              "savefig : Save the figure to an image file instead of showing it on screen.\n",
416
              "\n",
417
              "Notes\n",
418
              "-----\n",
419
              "**Saving figures to file and showing a window at the same time**\n",
420
              "\n",
421
              "If you want an image file as well as a user interface window, use\n",
422
              "`.pyplot.savefig` before `.pyplot.show`. At the end of (a blocking)\n",
423
              "``show()`` the figure is closed and thus unregistered from pyplot. Calling\n",
424
              "`.pyplot.savefig` afterwards would save a new and thus empty figure. This\n",
425
              "limitation of command order does not apply if the show is non-blocking or\n",
426
              "if you keep a reference to the figure and use `.Figure.savefig`.\n",
427
              "\n",
428
              "**Auto-show in jupyter notebooks**\n",
429
              "\n",
430
              "The jupyter backends (activated via ``%matplotlib inline``,\n",
431
              "``%matplotlib notebook``, or ``%matplotlib widget``), call ``show()`` at\n",
432
              "the end of every cell by default. Thus, you usually don&#x27;t have to call it\n",
433
              "explicitly there.</pre>\n",
434
              "      <script>\n",
435
              "      if (google.colab.kernel.accessAllowed && google.colab.files && google.colab.files.view) {\n",
436
              "        for (const element of document.querySelectorAll('.filepath')) {\n",
437
              "          element.style.display = 'block'\n",
438
              "          element.onclick = (event) => {\n",
439
              "            event.preventDefault();\n",
440
              "            event.stopPropagation();\n",
441
              "            google.colab.files.view(element.textContent, 401);\n",
442
              "          };\n",
443
              "        }\n",
444
              "      }\n",
445
              "      for (const element of document.querySelectorAll('.function-repr-contents')) {\n",
446
              "        element.onclick = (event) => {\n",
447
              "          event.preventDefault();\n",
448
              "          event.stopPropagation();\n",
449
              "          element.classList.toggle('function-repr-contents-collapsed');\n",
450
              "        };\n",
451
              "      }\n",
452
              "      </script>\n",
453
              "      </div>"
454
            ]
455
          },
456
          "metadata": {},
457
          "execution_count": 4
458
        },
459
        {
460
          "output_type": "display_data",
461
          "data": {
462
            "text/plain": [
463
              "<Figure size 1000x600 with 1 Axes>"
464
            ],
465
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIjCAYAAADvBuGTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABR9ElEQVR4nO3deXyM5/7/8fdMRjYkQUiktlhKrSlKU1pUKpYqetqii6WqGy0nqi09RcppdLG1tOq0hHMUR09Lf6WK2KpSGqSKWhu0lcQakYgwmfv3h4f5diQhiTsm4fV8POZxzHV/7uu+rnG35373vucai2EYhgAAAAAA18Xq7gEAAAAAwM2AcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAlVK1atTRgwAB3D+Om995776l27dry8PBQWFhYsR5r3bp1slgs+uKLL4r1OAAA9yBcAcANEBsbK4vFooSEhDy3t2/fXo0bN77u4yxfvlzjxo277n5uFStXrtSrr76qNm3aaM6cOXr77bdz1VwORAV5lUbnz5/XlClT1Lp1a/n7+8vb21u33367hg4dqn379rl7eJKkTZs2ady4cUpLS3P3UADgqmzuHgAAIG979+6V1Vq4/wa2fPlyzZgxg4BVQGvWrJHVatVnn30mT0/PPGvuuOMO/fvf/3ZpGzVqlMqVK6c33njjRgyz2Jw4cUKdO3fW1q1b9eCDD+rxxx9XuXLltHfvXi1cuFCzZs3ShQsX3D1Mbdq0SdHR0RowYIACAgLcPRwAyBfhCgBKKC8vL3cPodAyMzNVtmxZdw+jwI4dOyYfH598g5UkBQUF6cknn3RpmzhxogIDA3O1lzYDBgzQ9u3b9cUXX+hvf/uby7bx48eX+vAIADcajwUCQAl15XeuLl68qOjoaNWrV0/e3t6qVKmS2rZtq1WrVkm6dKE8Y8YMScrzUbXMzEyNGDFC1atXl5eXl+rXr6/3339fhmG4HDcrK0svv/yyAgMDVb58eT300EP6888/ZbFYXO6IjRs3ThaLRbt379bjjz+uChUqqG3btpKkHTt2aMCAAapdu7a8vb0VHBysp59+WidPnnQ51uU+9u3bpyeffFL+/v6qXLmy3nzzTRmGod9//109evSQn5+fgoODNWnSpAJ9dna7XePHj1edOnXk5eWlWrVqafTo0crOznbWWCwWzZkzR5mZmc7PKjY2tkD95+W3337To48+qooVK8rX11d33323li1bds39srOz9eCDD8rf31+bNm2SJDkcDk2dOlWNGjWSt7e3goKC9Nxzz+n06dMu+9aqVUsPPvigNm7cqFatWsnb21u1a9fWvHnzrnnczZs3a9myZRo0aFCuYCVdCvfvv/++S9uaNWt07733qmzZsgoICFCPHj3066+/utQMGDBAtWrVytXf5b/rv7JYLBo6dKiWLFmixo0by8vLS40aNdKKFStc9hs5cqQkKTQ01Pl3dejQIUnSqlWr1LZtWwUEBKhcuXKqX7++Ro8efc35A0Bx4M4VANxAZ86c0YkTJ3K1X7x48Zr7jhs3TjExMXrmmWfUqlUrpaenKyEhQdu2bdMDDzyg5557TkePHtWqVatyPcZmGIYeeughrV27VoMGDVJYWJi+++47jRw5Un/++aemTJnirB0wYID++9//6qmnntLdd9+t9evXq1u3bvmO69FHH1W9evX09ttvO4PaqlWr9Ntvv2ngwIEKDg7Wrl27NGvWLO3atUs//vhjrovs3r1764477tDEiRO1bNkyTZgwQRUrVtQnn3yi+++/X++8847mz5+vV155RXfddZfuu+++q35WzzzzjObOnatHHnlEI0aM0ObNmxUTE6Nff/1VX331lSTp3//+t2bNmqUtW7bo008/lSTdc8891/x7yEtqaqruuecenTt3Ti+//LIqVaqkuXPn6qGHHtIXX3yhXr165blfVlaWevTooYSEBK1evVp33XWXJOm5555TbGysBg4cqJdffllJSUmaPn26tm/frh9++EFlypRx9nHgwAE98sgjGjRokPr376/Zs2drwIABatGihRo1apTvmL/++mtJ0lNPPVWgOa5evVpdunRR7dq1NW7cOGVlZenDDz9UmzZttG3btjwDVUFs3LhRX375pV588UWVL19eH3zwgf72t7/pyJEjqlSpkh5++GHt27dPCxYs0JQpUxQYGChJqly5snbt2qUHH3xQTZs21VtvvSUvLy8dOHBAP/zwQ5HGAgDXzQAAFLs5c+YYkq76atSokcs+NWvWNPr37+9836xZM6Nbt25XPc6QIUOMvP7VvmTJEkOSMWHCBJf2Rx55xLBYLMaBAwcMwzCMrVu3GpKM4cOHu9QNGDDAkGSMHTvW2TZ27FhDktG3b99cxzt37lyutgULFhiSjA0bNuTq49lnn3W22e12o1q1aobFYjEmTpzobD99+rTh4+Pj8pnkJTEx0ZBkPPPMMy7tr7zyiiHJWLNmjbOtf//+RtmyZa/aX14aNWpktGvXzvl++PDhhiTj+++/d7adPXvWCA0NNWrVqmXk5OQYhmEYa9euNSQZixcvNs6ePWu0a9fOCAwMNLZv3+7c7/vvvzckGfPnz3c55ooVK3K116xZM9dneuzYMcPLy8sYMWLEVefQq1cvQ5Jx+vTpAs05LCzMqFKlinHy5Eln288//2xYrVajX79+zrb+/fsbNWvWzLX/5b/rv5JkeHp6Os+/y31KMj788ENn23vvvWdIMpKSklz2nzJliiHJOH78eIHmAADFjccCAeAGmjFjhlatWpXr1bRp02vuGxAQoF27dmn//v2FPu7y5cvl4eGhl19+2aV9xIgRMgxD3377rSQ5H8d68cUXXepeeumlfPt+/vnnc7X5+Pg4/3z+/HmdOHFCd999tyRp27ZtueqfeeYZ5589PDzUsmVLGYahQYMGOdsDAgJUv359/fbbb/mORbo0V0mKiopyaR8xYoQkFehRvcJavny5WrVq5XwsUpLKlSunZ599VocOHdLu3btd6s+cOaNOnTppz549WrduncsS8IsXL5a/v78eeOABnThxwvlq0aKFypUrp7Vr17r01bBhQ917773O95UrVy7Q55Seni5JKl++/DXnl5ycrMTERA0YMEAVK1Z0tjdt2lQPPPCA8zMvioiICNWpU8elTz8/v2uOX5JzcYulS5fK4XAUeQwAYBbCFQDcQK1atVJERESuV4UKFa6571tvvaW0tDTdfvvtatKkiUaOHKkdO3YU6LiHDx9WSEhIrgvpO+64w7n98v9arVaFhoa61NWtWzffvq+slaRTp05p2LBhCgoKko+PjypXruysO3PmTK76GjVquLy/vCT45UfA/tp+5feOrnR5DleOOTg4WAEBAc65munw4cOqX79+rvYrP9/Lhg8frp9++kmrV6/O9eje/v37debMGVWpUkWVK1d2eWVkZOjYsWMu9Vd+dpJUoUKFa35Ofn5+kqSzZ88WaH6S8p3jiRMnlJmZec1+8lLU8UuXHidt06aNnnnmGQUFBalPnz7673//S9AC4DZ85woASon77rtPBw8e1NKlS7Vy5Up9+umnmjJlimbOnOly5+dG++tdqssee+wxbdq0SSNHjlRYWJjKlSsnh8Ohzp0753nh6+HhUaA2SbkW4MhPSf7dqR49emjhwoWaOHGi5s2b57LkvsPhUJUqVTR//vw8961cubLL+6J+Tg0aNJAk/fLLLy53vq5Xfp97Tk5Onu3X8/fs4+OjDRs2aO3atVq2bJlWrFihRYsW6f7779fKlSvz7RsAigt3rgCgFKlYsaIGDhyoBQsW6Pfff1fTpk1dVvDL78K2Zs2aOnr0aK67FHv27HFuv/y/DodDSUlJLnUHDhwo8BhPnz6tuLg4vf7664qOjlavXr30wAMPqHbt2gXu43pcnsOVj0+mpqYqLS3NOVezj7l3795c7Vd+vpf17NlTs2fP1ueff64hQ4a4bKtTp45OnjypNm3a5HmXs1mzZqaMuXv37pKk//znP9esvTz+/OYYGBjoXIK/QoUKef7Y7/XcMbxaULZarerYsaMmT56s3bt365///KfWrFmT6/FJALgRCFcAUEpcuYx5uXLlVLduXZflxS9f4F55cdu1a1fl5ORo+vTpLu1TpkyRxWJRly5dJEmRkZGSpI8++sil7sMPPyzwOC/fLbjyzsPUqVML3Mf16Nq1a57Hmzx5siRddeXD6znmli1bFB8f72zLzMzUrFmzVKtWLTVs2DDXPv369dMHH3ygmTNn6rXXXnO2P/bYY8rJydH48eNz7WO32/MMLkURHh6uzp0769NPP9WSJUtybb9w4YJeeeUVSVLVqlUVFhamuXPnuhx/586dWrlypfMzly6FwzNnzrg8spqcnOxcpbEo8juvT506lav28vfX/vrPBQDcKDwWCAClRMOGDdW+fXu1aNFCFStWVEJCgr744gsNHTrUWdOiRQtJ0ssvv6zIyEh5eHioT58+6t69uzp06KA33nhDhw4dUrNmzbRy5UotXbpUw4cPdy4o0KJFC/3tb3/T1KlTdfLkSedS7Pv27ZNUsEft/Pz8dN999+ndd9/VxYsXddttt2nlypW57oYVl2bNmql///6aNWuW0tLS1K5dO23ZskVz585Vz5491aFDB9OP+frrr2vBggXq0qWLXn75ZVWsWFFz585VUlKS/ve//7k89vdXQ4cOVXp6ut544w35+/tr9OjRateunZ577jnFxMQoMTFRnTp1UpkyZbR//34tXrxY06ZN0yOPPGLKuOfNm6dOnTrp4YcfVvfu3dWxY0eVLVtW+/fv18KFC5WcnOz8rav33ntPXbp0UXh4uAYNGuRcit3f39/l7mmfPn302muvqVevXnr55Zd17tw5ffzxx7r99tvzXMykIC6f12+88Yb69OmjMmXKqHv37nrrrbe0YcMGdevWTTVr1tSxY8f00UcfqVq1ai6LiwDADePOpQoB4FZxeSn2n376Kc/t7dq1u+ZS7BMmTDBatWplBAQEGD4+PkaDBg2Mf/7zn8aFCxecNXa73XjppZeMypUrGxaLxWXp67Nnzxp///vfjZCQEKNMmTJGvXr1jPfee89wOBwux83MzDSGDBliVKxY0ShXrpzRs2dPY+/evYYkl6XRLy+tndcy2H/88YfRq1cvIyAgwPD39zceffRR4+jRo/ku535lH/ktkZ7X55SXixcvGtHR0UZoaKhRpkwZo3r16saoUaOM8+fPF+g413LlUuyGYRgHDx40HnnkESMgIMDw9vY2WrVqZXzzzTcuNX9div2vXn31VUOSMX36dGfbrFmzjBYtWhg+Pj5G+fLljSZNmhivvvqqcfToUWdNzZo181yev127drnGl59z584Z77//vnHXXXcZ5cqVMzw9PY169eoZL730kssS6YZhGKtXrzbatGlj+Pj4GH5+fkb37t2N3bt35+pz5cqVRuPGjQ1PT0+jfv36xn/+8598l2IfMmRIrv2vPPcNwzDGjx9v3HbbbYbVanUuyx4XF2f06NHDCAkJMTw9PY2QkBCjb9++xr59+wo0dwAwm8UwCvjNYADALSsxMVF33nmn/vOf/+iJJ55w93AAACiR+M4VAMBFVlZWrrapU6fKarXqvvvuc8OIAAAoHfjOFQDAxbvvvqutW7eqQ4cOstls+vbbb/Xtt9/q2WefVfXq1d09PAAASiweCwQAuFi1apWio6O1e/duZWRkqEaNGnrqqaf0xhtvyGbjv8kBAJAfwhUAAAAAmIDvXAEAAACACQhXAAAAAGACHp7Pg8Ph0NGjR1W+fPkC/WAmAAAAgJuTYRg6e/asQkJC8v1R+MsIV3k4evQoK2IBAAAAcPr9999VrVq1q9YQrvJQvnx5SZc+QD8/PzePBgAAAIC7pKenq3r16s6McDWEqzxcfhTQz8+PcAUAAACgQF8XYkELAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATuDVcxcTE6K677lL58uVVpUoV9ezZU3v37r3mfosXL1aDBg3k7e2tJk2aaPny5S7bDcPQmDFjVLVqVfn4+CgiIkL79+8vrmkAAAAAgHvD1fr16zVkyBD9+OOPWrVqlS5evKhOnTopMzMz3302bdqkvn37atCgQdq+fbt69uypnj17aufOnc6ad999Vx988IFmzpypzZs3q2zZsoqMjNT58+dvxLQAAAAA3IIshmEY7h7EZcePH1eVKlW0fv163XfffXnW9O7dW5mZmfrmm2+cbXfffbfCwsI0c+ZMGYahkJAQjRgxQq+88ook6cyZMwoKClJsbKz69OlzzXGkp6fL399fZ86ckZ+fnzmTAwAAAFDqFCYb2G7QmArkzJkzkqSKFSvmWxMfH6+oqCiXtsjISC1ZskSSlJSUpJSUFEVERDi3+/v7q3Xr1oqPj88zXGVnZys7O9v5Pj09XZJkt9tlt9uLPB8AAADgZnXixAmdPXu22PovX768AgMDi63/gipMHigx4crhcGj48OFq06aNGjdunG9dSkqKgoKCXNqCgoKUkpLi3H65Lb+aK8XExCg6OjpXe0JCgsqWLVuoeQAAAAA3uwsXLmj37n26eNFRbMcoU8aqhg1vl6enZ7EdoyCu9pWlK5WYcDVkyBDt3LlTGzduvOHHHjVqlMvdsPT0dFWvXl0tW7bksUAAAADgCklJSXrttWny8homH59qpveflfWHsrOnaf78+xUaGmp6/4Vx+am2gigR4Wro0KH65ptvtGHDBlWrdvW/nODgYKWmprq0paamKjg42Ln9clvVqlVdasLCwvLs08vLS15eXrnabTabbLYS8REBAAAAJYbVapXdnqNy5WrIy6uO6f3b7VZlZubIarW6/Xq8MMd362qBhmFo6NCh+uqrr7RmzZoCpdLw8HDFxcW5tK1atUrh4eGSpNDQUAUHB7vUpKena/Pmzc4aAAAAADCbW2PgkCFD9Pnnn2vp0qUqX7688ztR/v7+8vHxkST169dPt912m2JiYiRJw4YNU7t27TRp0iR169ZNCxcuVEJCgmbNmiVJslgsGj58uCZMmKB69eopNDRUb775pkJCQtSzZ0+3zBMAAADAzc+t4erjjz+WJLVv396lfc6cORowYIAk6ciRI7Ja/+8G2z333KPPP/9c//jHPzR69GjVq1dPS5YscVkE49VXX1VmZqaeffZZpaWlqW3btlqxYoW8vb2LfU4AAAAAbk1uDVcF+YmtdevW5Wp79NFH9eijj+a7j8Vi0VtvvaW33nrreoYHAAAAAAXm1u9cAQAAAMDNgnAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAreGqw0bNqh79+4KCQmRxWLRkiVLrlo/YMAAWSyWXK9GjRo5a8aNG5dre4MGDYp5JgAAAABudW4NV5mZmWrWrJlmzJhRoPpp06YpOTnZ+fr9999VsWJFPfrooy51jRo1cqnbuHFjcQwfAAAAAJxs7jx4ly5d1KVLlwLX+/v7y9/f3/l+yZIlOn36tAYOHOhSZ7PZFBwcbNo4AQAAAOBa3Bqurtdnn32miIgI1axZ06V9//79CgkJkbe3t8LDwxUTE6MaNWrk2092drays7Od79PT0yVJdrtddru9eAYPAAAAlFIOh0M2m4dsNoc8PMy/XrbZLvXvcDjcfj1emOOX2nB19OhRffvtt/r8889d2lu3bq3Y2FjVr19fycnJio6O1r333qudO3eqfPnyefYVExOj6OjoXO0JCQkqW7ZssYwfAAAAKK2ysrL0+OORstkOy8PjmOn95+RkyW6P1OHDh3XsmPn9F0ZmZmaBay2GYRjFOJYCs1gs+uqrr9SzZ88C1cfExGjSpEk6evSoPD09861LS0tTzZo1NXnyZA0aNCjPmrzuXFWvXl0nT56Un59foeYBAAAA3OySkpL0xBMjFRDwnnx9Q03v/9y5JKWljdT8+e8pNNT8/gsjPT1dlSpV0pkzZ66ZDUrlnSvDMDR79mw99dRTVw1WkhQQEKDbb79dBw4cyLfGy8tLXl5eudptNptstlL5EQEAAADFxmq1ym7Pkd1uVU6O+dfLdvul/q1Wq9uvxwtz/FL5O1fr16/XgQMH8r0T9VcZGRk6ePCgqlategNGBgAAAOBW5dZwlZGRocTERCUmJkq6dHsxMTFRR44ckSSNGjVK/fr1y7XfZ599ptatW6tx48a5tr3yyitav369Dh06pE2bNqlXr17y8PBQ3759i3UuAAAAAG5tbr3HlpCQoA4dOjjfR0VFSZL69++v2NhYJScnO4PWZWfOnNH//vc/TZs2Lc8+//jjD/Xt21cnT55U5cqV1bZtW/3444+qXLly8U0EAAAAwC3PreGqffv2utp6GrGxsbna/P39de7cuXz3WbhwoRlDAwAAAIBCKZXfuQIAAACAkoZwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYAK3hqsNGzaoe/fuCgkJkcVi0ZIlS65av27dOlksllyvlJQUl7oZM2aoVq1a8vb2VuvWrbVly5ZinAUAAAAAuDlcZWZmqlmzZpoxY0ah9tu7d6+Sk5OdrypVqji3LVq0SFFRURo7dqy2bdumZs2aKTIyUseOHTN7+AAAAADgZHPnwbt06aIuXboUer8qVaooICAgz22TJ0/W4MGDNXDgQEnSzJkztWzZMs2ePVuvv/769QwXAAAAAPLl1nBVVGFhYcrOzlbjxo01btw4tWnTRpJ04cIFbd26VaNGjXLWWq1WRUREKD4+Pt/+srOzlZ2d7Xyfnp4uSbLb7bLb7cU0CwAAAKB0cjgcstk8ZLM55OFh/vWyzXapf4fD4fbr8cIcv1SFq6pVq2rmzJlq2bKlsrOz9emnn6p9+/bavHmzmjdvrhMnTignJ0dBQUEu+wUFBWnPnj359hsTE6Po6Ohc7QkJCSpbtqzp8wAAAABKs6ysLD3+eKRstsPy8DD/6zc5OVmy2yN1+PBht3+9JzMzs8C1pSpc1a9fX/Xr13e+v+eee3Tw4EFNmTJF//73v4vc76hRoxQVFeV8n56erurVq6tly5by8/O7rjEDAAAAN5ukpCSNHj1dAQER8vUNNb3/c+eSlJY2XfPnRyg01Pz+C+PyU20FUarCVV5atWqljRs3SpICAwPl4eGh1NRUl5rU1FQFBwfn24eXl5e8vLxytdtsNtlspf4jAgAAAExltVplt+fIbrcqJ8f862W7/VL/VqvV7dfjhTl+qf+dq8TERFWtWlWS5OnpqRYtWiguLs653eFwKC4uTuHh4e4aIgAAAIBbgFtjYEZGhg4cOOB8n5SUpMTERFWsWFE1atTQqFGj9Oeff2revHmSpKlTpyo0NFSNGjXS+fPn9emnn2rNmjVauXKls4+oqCj1799fLVu2VKtWrTR16lRlZmY6Vw8EAAAAgOLg1nCVkJCgDh06ON9f/t5T//79FRsbq+TkZB05csS5/cKFCxoxYoT+/PNP+fr6qmnTplq9erVLH71799bx48c1ZswYpaSkKCwsTCtWrMi1yAUAAAAAmMliGIbh7kGUNOnp6fL399eZM2dY0AIAAAC4wsGDB/Xoo8MVEDBVZcvWMb3/zMyDSksbrsWLp6pOHfP7L4zCZINS/50rAAAAACgJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmcGu42rBhg7p3766QkBBZLBYtWbLkqvVffvmlHnjgAVWuXFl+fn4KDw/Xd99951Izbtw4WSwWl1eDBg2KcRYAAAAA4OZwlZmZqWbNmmnGjBkFqt+wYYMeeOABLV++XFu3blWHDh3UvXt3bd++3aWuUaNGSk5Odr42btxYHMMHAAAAACebOw/epUsXdenSpcD1U6dOdXn/9ttva+nSpfp//+//6c4773S222w2BQcHmzVMAAAAALgmt4ar6+VwOHT27FlVrFjRpX3//v0KCQmRt7e3wsPDFRMToxo1auTbT3Z2trKzs53v09PTJUl2u112u714Bg8AAACUUg6HQzabh2w2hzw8zL9ettku9e9wONx+PV6Y45fqcPX+++8rIyNDjz32mLOtdevWio2NVf369ZWcnKzo6Gjde++92rlzp8qXL59nPzExMYqOjs7VnpCQoLJlyxbb+AEAAIDSKCsrS48/Himb7bA8PI6Z3n9OTpbs9kgdPnxYx46Z339hZGZmFrjWYhiGUYxjKTCLxaKvvvpKPXv2LFD9559/rsGDB2vp0qWKiIjIty4tLU01a9bU5MmTNWjQoDxr8rpzVb16dZ08eVJ+fn6FmgcAAABws0tKStITT4xUQMB78vUNNb3/c+eSlJY2UvPnv6fQUPP7L4z09HRVqlRJZ86cuWY2KJV3rhYuXKhnnnlGixcvvmqwkqSAgADdfvvtOnDgQL41Xl5e8vLyytVus9lks5XKjwgAAAAoNlarVXZ7jux2q3JyzL9ettsv9W+1Wt1+PV6Y45e637lasGCBBg4cqAULFqhbt27XrM/IyNDBgwdVtWrVGzA6AAAAALcqt8bAjIwMlztKSUlJSkxMVMWKFVWjRg2NGjVKf/75p+bNmyfp0qOA/fv317Rp09S6dWulpKRIknx8fOTv7y9JeuWVV9S9e3fVrFlTR48e1dixY+Xh4aG+ffve+AkCAAAAuGW49c5VQkKC7rzzTucy6lFRUbrzzjs1ZswYSVJycrKOHDnirJ81a5bsdruGDBmiqlWrOl/Dhg1z1vzxxx/q27ev6tevr8cee0yVKlXSjz/+qMqVK9/YyQEAAAC4pbj1zlX79u11tfU0YmNjXd6vW7fumn0uXLjwOkcFAAAAAIVX6r5zBQAAAAAlEeEKAAAAAExAuAIAAAAAExCuAAAAAMAERQpXv/32m9njAAAAAIBSrUjhqm7duurQoYP+85//6Pz582aPCQAAAABKnSKFq23btqlp06aKiopScHCwnnvuOW3ZssXssQEAAABAqVGkcBUWFqZp06bp6NGjmj17tpKTk9W2bVs1btxYkydP1vHjx80eJwAAAACUaNe1oIXNZtPDDz+sxYsX65133tGBAwf0yiuvqHr16urXr5+Sk5PNGicAAAAAlGjXFa4SEhL04osvqmrVqpo8ebJeeeUVHTx4UKtWrdLRo0fVo0cPs8YJAAAAACWarSg7TZ48WXPmzNHevXvVtWtXzZs3T127dpXVeimrhYaGKjY2VrVq1TJzrAAAAABQYhUpXH388cd6+umnNWDAAFWtWjXPmipVquizzz67rsEBAAAAQGlRpHC1f//+a9Z4enqqf//+RekeAAAAAEqdIn3nas6cOVq8eHGu9sWLF2vu3LnXPSgAAAAAKG2KFK5iYmIUGBiYq71KlSp6++23r3tQAAAAAFDaFClcHTlyRKGhobnaa9asqSNHjlz3oAAAAACgtClSuKpSpYp27NiRq/3nn39WpUqVrntQAAAAAFDaFClc9e3bVy+//LLWrl2rnJwc5eTkaM2aNRo2bJj69Olj9hgBAAAAoMQr0mqB48eP16FDh9SxY0fZbJe6cDgc6tevH9+5AgAAAHBLKlK48vT01KJFizR+/Hj9/PPP8vHxUZMmTVSzZk2zxwcAAAAApUKRwtVlt99+u26//XazxgIAAAAApVaRwlVOTo5iY2MVFxenY8eOyeFwuGxfs2aNKYMDAAAAgNKiSOFq2LBhio2NVbdu3dS4cWNZLBazxwUAAAAApUqRwtXChQv13//+V127djV7PAAAAABQKhVpKXZPT0/VrVvX7LEAAAAAQKlVpHA1YsQITZs2TYZhmD0eAAAAACiVivRY4MaNG7V27Vp9++23atSokcqUKeOy/csvvzRlcAAAAABQWhQpXAUEBKhXr15mjwUAAAAASq0ihas5c+aYPQ4AAAAAKNWK9J0rSbLb7Vq9erU++eQTnT17VpJ09OhRZWRkmDY4AAAAACgtinTn6vDhw+rcubOOHDmi7OxsPfDAAypfvrzeeecdZWdna+bMmWaPEwAAAABKtCLduRo2bJhatmyp06dPy8fHx9neq1cvxcXFmTY4AAAAACgtinTn6vvvv9emTZvk6enp0l6rVi39+eefpgwMAAAAAEqTIt25cjgcysnJydX+xx9/qHz58tc9KAAAAAAobYoUrjp16qSpU6c631ssFmVkZGjs2LHq2rWrWWMDAAAAgFKjSI8FTpo0SZGRkWrYsKHOnz+vxx9/XPv371dgYKAWLFhg9hgBAAAAoMQrUriqVq2afv75Zy1cuFA7duxQRkaGBg0apCeeeMJlgQsAAAAAuFUUKVxJks1m05NPPmnmWAAAAACg1CpSuJo3b95Vt/fr169IgwEAAACA0qpI4WrYsGEu7y9evKhz587J09NTvr6+hCsAAAAAt5wirRZ4+vRpl1dGRob27t2rtm3bsqAFAAAAgFtSkcJVXurVq6eJEyfmuqsFAAAAALcC08KVdGmRi6NHj5rZJQAAAACUCkX6ztXXX3/t8t4wDCUnJ2v69Olq06aNKQMDAAAAgNKkSHeuevbs6fJ6+OGHNW7cODVt2lSzZ88ucD8bNmxQ9+7dFRISIovFoiVLllxzn3Xr1ql58+by8vJS3bp1FRsbm6tmxowZqlWrlry9vdW6dWtt2bKlELMDAAAAgMIrUrhyOBwur5ycHKWkpOjzzz9X1apVC9xPZmammjVrphkzZhSoPikpSd26dVOHDh2UmJio4cOH65lnntF3333nrFm0aJGioqI0duxYbdu2Tc2aNVNkZKSOHTtW6HkCAAAAQEEV+UeEzdClSxd16dKlwPUzZ85UaGioJk2aJEm64447tHHjRk2ZMkWRkZGSpMmTJ2vw4MEaOHCgc59ly5Zp9uzZev31182fBAAAAACoiOEqKiqqwLWTJ08uyiHyFB8fr4iICJe2yMhIDR8+XJJ04cIFbd26VaNGjXJut1qtioiIUHx8fL79ZmdnKzs72/k+PT1dkmS322W3200bf1GdOHFCZ8+eLbb+y5cvr8DAwGLrHwAAADcXh8Mhm81DNptDHh7mXy/bbJf6dzgcbr8eL8zxixSutm/fru3bt+vixYuqX7++JGnfvn3y8PBQ8+bNnXUWi6Uo3ecrJSVFQUFBLm1BQUFKT09XVlaWTp8+rZycnDxr9uzZk2+/MTExio6OztWekJCgsmXLmjP4Irpw4YJ2796nixcdxXaMMmWsatjwdnl6ehbbMQAAAHDzyMrK0uOPR8pmOywPD/O/fpOTkyW7PVKHDx92+9d7MjMzC1xbpHDVvXt3lS9fXnPnzlWFChUkXfph4YEDB+ree+/ViBEjitKt24waNcrlblx6erqqV6+uli1bys/Pz40ju/Q9s9demyYvr2Hy8almev9ZWX8oO3ua5s+/X6Ghoab3DwAAgJtPUlKSRo+eroCACPn6mn8Nee5cktLSpmv+/Ai3X6NefqqtIIoUriZNmqSVK1c6g5UkVahQQRMmTFCnTp2KLVwFBwcrNTXVpS01NVV+fn7y8fGRh4eHPDw88qwJDg7Ot18vLy95eXnlarfZbLLZ3Pq1NFmtVtntOSpXroa8vOqY3r/dblVmZo6sVqvb5woAAIDS4fI1qt1uVU6O+deQdvul/kvCNWphjl+k1QLT09N1/PjxXO3Hjx8v1u8GhYeHKy4uzqVt1apVCg8PlyR5enqqRYsWLjUOh0NxcXHOGgAAAAAoDkUKV7169dLAgQP15Zdf6o8//tAff/yh//3vfxo0aJAefvjhAveTkZGhxMREJSYmSrp0ezExMVFHjhyRdOlxvX79+jnrn3/+ef3222969dVXtWfPHn300Uf673//q7///e/OmqioKP3rX//S3Llz9euvv+qFF15QZmamc/VAAAAAACgORbrHNnPmTL3yyit6/PHHdfHixUsd2WwaNGiQ3nvvvQL3k5CQoA4dOjjfX/7eU//+/RUbG6vk5GRn0JKk0NBQLVu2TH//+981bdo0VatWTZ9++qlzGXZJ6t27t44fP64xY8YoJSVFYWFhWrFiRa5FLgAAAADATEUKV76+vvroo4/03nvv6eDBg5KkOnXqFHplvfbt28swjHy3x8bG5rnP9u3br9rv0KFDNXTo0EKNBQAAAACuR5EeC7wsOTlZycnJqlevnsqWLXvVoAQAAAAAN7MihauTJ0+qY8eOuv3229W1a1clJydLkgYNGlTqlmEHAAAAADMUKVz9/e9/V5kyZXTkyBH5+vo623v37q0VK1aYNjgAAAAAKC2K9J2rlStX6rvvvlO1aq4/aluvXj0dPnzYlIEBAAAAQGlSpDtXmZmZLnesLjt16lSeP8YLAAAAADe7IoWre++9V/PmzXO+t1gscjgcevfdd12WVgcAAACAW0WRHgt899131bFjRyUkJOjChQt69dVXtWvXLp06dUo//PCD2WMEAAAAgBKvSHeuGjdurH379qlt27bq0aOHMjMz9fDDD2v79u2qU6eO2WMEAAAAgBKv0HeuLl68qM6dO2vmzJl64403imNMAAAAAFDqFPrOVZkyZbRjx47iGAsAAAAAlFpFeizwySef1GeffWb2WAAAAACg1CrSghZ2u12zZ8/W6tWr1aJFC5UtW9Zl++TJk00ZHAAAAACUFoUKV7/99ptq1aqlnTt3qnnz5pKkffv2udRYLBbzRgcAAAAApUShwlW9evWUnJystWvXSpJ69+6tDz74QEFBQcUyOAAAAAAoLQr1nSvDMFzef/vtt8rMzDR1QAAAAABQGhVpQYvLrgxbAAAAAHCrKlS4slgsub5TxXesAAAAAKCQ37kyDEMDBgyQl5eXJOn8+fN6/vnnc60W+OWXX5o3QgAAAAAoBQoVrvr37+/y/sknnzR1MAAAAABQWhUqXM2ZM6e4xgEAAAAApdp1LWgBAAAAALiEcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgghIRrmbMmKFatWrJ29tbrVu31pYtW/Ktbd++vSwWS65Xt27dnDUDBgzItb1z5843YioAAAAAblE2dw9g0aJFioqK0syZM9W6dWtNnTpVkZGR2rt3r6pUqZKr/ssvv9SFCxec70+ePKlmzZrp0Ucfdanr3Lmz5syZ43zv5eVVfJMAAAAAcMtz+52ryZMna/DgwRo4cKAaNmyomTNnytfXV7Nnz86zvmLFigoODna+Vq1aJV9f31zhysvLy6WuQoUKN2I6AAAAAG5Rbr1zdeHCBW3dulWjRo1ytlmtVkVERCg+Pr5AfXz22Wfq06ePypYt69K+bt06ValSRRUqVND999+vCRMmqFKlSnn2kZ2drezsbOf79PR0SZLdbpfdbi/stEzlcDhks3nIZnPIw8P8sdhsl/p3OBxunysAAABKh1vpGrUwx3druDpx4oRycnIUFBTk0h4UFKQ9e/Zcc/8tW7Zo586d+uyzz1zaO3furIcfflihoaE6ePCgRo8erS5duig+Pl4eHh65+omJiVF0dHSu9oSEhFyh7UbLysrS449HymY7LA+PY6b3n5OTJbs9UocPH9axY+b3DwAAgJvPrXSNmpmZWeBat3/n6np89tlnatKkiVq1auXS3qdPH+efmzRpoqZNm6pOnTpat26dOnbsmKufUaNGKSoqyvk+PT1d1atXV8uWLeXn51d8EyiApKQkjR49XQEBEfL1DTW9/3PnkpSWNl3z50coNNT8/gEAAHDzuZWuUS8/1VYQbg1XgYGB8vDwUGpqqkt7amqqgoODr7pvZmamFi5cqLfeeuuax6ldu7YCAwN14MCBPMOVl5dXngte2Gw22WzuzZ9Wq1V2e47sdqtycswfi91+qX+r1er2uQIAAKB0uJWuUQtzfLcuaOHp6akWLVooLi7O2eZwOBQXF6fw8PCr7rt48WJlZ2frySefvOZx/vjjD508eVJVq1a97jEDAAAAQF7cvlpgVFSU/vWvf2nu3Ln69ddf9cILLygzM1MDBw6UJPXr189lwYvLPvvsM/Xs2TPXIhUZGRkaOXKkfvzxRx06dEhxcXHq0aOH6tatq8jIyBsyJwAAAAC3Hrc/B9a7d28dP35cY8aMUUpKisLCwrRixQrnIhdHjhyR1eqaAffu3auNGzdq5cqVufrz8PDQjh07NHfuXKWlpSkkJESdOnXS+PHj+a0rAAAAAMXG7eFKkoYOHaqhQ4fmuW3dunW52urXry/DMPKs9/Hx0XfffWfm8AAAAADgmtz+WCAAAAAA3AwIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYoEeFqxowZqlWrlry9vdW6dWtt2bIl39rY2FhZLBaXl7e3t0uNYRgaM2aMqlatKh8fH0VERGj//v3FPQ0AAAAAtzC3h6tFixYpKipKY8eO1bZt29SsWTNFRkbq2LFj+e7j5+en5ORk5+vw4cMu299991198MEHmjlzpjZv3qyyZcsqMjJS58+fL+7pAAAAALhFuT1cTZ48WYMHD9bAgQPVsGFDzZw5U76+vpo9e3a++1gsFgUHBztfQUFBzm2GYWjq1Kn6xz/+oR49eqhp06aaN2+ejh49qiVLltyAGQEAAAC4FdncefALFy5o69atGjVqlLPNarUqIiJC8fHx+e6XkZGhmjVryuFwqHnz5nr77bfVqFEjSVJSUpJSUlIUERHhrPf391fr1q0VHx+vPn365OovOztb2dnZzvfp6emSJLvdLrvdft3zvB4Oh0M2m4dsNoc8PMwfi812qX+Hw+H2uQIAAKB0uJWuUQtzfLeGqxMnTignJ8flzpMkBQUFac+ePXnuU79+fc2ePVtNmzbVmTNn9P777+uee+7Rrl27VK1aNaWkpDj7uLLPy9uuFBMTo+jo6FztCQkJKlu2bFGmZpqsrCw9/nikbLbD8vDI/1HJosrJyZLdHqnDhw9f9VFMAAAA4LJb6Ro1MzOzwLVuDVdFER4ervDwcOf7e+65R3fccYc++eQTjR8/vkh9jho1SlFRUc736enpql69ulq2bCk/P7/rHvP1SEpK0ujR0xUQECFf31DT+z93LklpadM1f36EQkPN7x8AAAA3n1vpGvXyU20F4dZwFRgYKA8PD6Wmprq0p6amKjg4uEB9lClTRnfeeacOHDggSc79UlNTVbVqVZc+w8LC8uzDy8tLXl5eudptNptsNvfmT6vVKrs9R3a7VTk55o/Fbr/Uv9VqdftcAQAAUDrcSteohTm+Wxe08PT0VIsWLRQXF+dsczgciouLc7k7dTU5OTn65ZdfnEEqNDRUwcHBLn2mp6dr8+bNBe4TAAAAAArL7bcqoqKi1L9/f7Vs2VKtWrXS1KlTlZmZqYEDB0qS+vXrp9tuu00xMTGSpLfeekt333236tatq7S0NL333ns6fPiwnnnmGUmXVhIcPny4JkyYoHr16ik0NFRvvvmmQkJC1LNnT3dNEwAAAMBNzu3hqnfv3jp+/LjGjBmjlJQUhYWFacWKFc4FKY4cOSKr9f9usJ0+fVqDBw9WSkqKKlSooBYtWmjTpk1q2LChs+bVV19VZmamnn32WaWlpalt27ZasWJFrh8bBgAAAACzuD1cSdLQoUM1dOjQPLetW7fO5f2UKVM0ZcqUq/ZnsVj01ltv6a233jJriAAAAABwVW7/EWEAAAAAuBkQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExQIsLVjBkzVKtWLXl7e6t169basmVLvrX/+te/dO+996pChQqqUKGCIiIictUPGDBAFovF5dW5c+fingYAAACAW5jbw9WiRYsUFRWlsWPHatu2bWrWrJkiIyN17NixPOvXrVunvn37au3atYqPj1f16tXVqVMn/fnnny51nTt3VnJysvO1YMGCGzEdAAAAALcot4eryZMna/DgwRo4cKAaNmyomTNnytfXV7Nnz86zfv78+XrxxRcVFhamBg0a6NNPP5XD4VBcXJxLnZeXl4KDg52vChUq3IjpAAAAALhF2dx58AsXLmjr1q0aNWqUs81qtSoiIkLx8fEF6uPcuXO6ePGiKlas6NK+bt06ValSRRUqVND999+vCRMmqFKlSnn2kZ2drezsbOf79PR0SZLdbpfdbi/stEzlcDhks3nIZnPIw8P8sdhsl/p3OBxunysAAABKh1vpGrUwx3druDpx4oRycnIUFBTk0h4UFKQ9e/YUqI/XXntNISEhioiIcLZ17txZDz/8sEJDQ3Xw4EGNHj1aXbp0UXx8vDw8PHL1ERMTo+jo6FztCQkJKlu2bCFnZa6srCw9/nikbLbD8vDI+1HJ65GTkyW7PVKHDx/O91FMAAAA4K9upWvUzMzMAte6NVxdr4kTJ2rhwoVat26dvL29ne19+vRx/rlJkyZq2rSp6tSpo3Xr1qljx465+hk1apSioqKc79PT01W9enW1bNlSfn5+xTuJa0hKStLo0dMVEBAhX99Q0/s/dy5JaWnTNX9+hEJDze8fAAAAN59b6Rr18lNtBeHWcBUYGCgPDw+lpqa6tKempio4OPiq+77//vuaOHGiVq9eraZNm161tnbt2goMDNSBAwfyDFdeXl7y8vLK1W6z2WSzuTd/Wq1W2e05stutyskxfyx2+6X+rVar2+cKAACA0uFWukYtzPHduqCFp6enWrRo4bIYxeXFKcLDw/Pd791339X48eO1YsUKtWzZ8prH+eOPP3Ty5ElVrVrVlHEDAAAAwJXcvlpgVFSU/vWvf2nu3Ln69ddf9cILLygzM1MDBw6UJPXr189lwYt33nlHb775pmbPnq1atWopJSVFKSkpysjIkCRlZGRo5MiR+vHHH3Xo0CHFxcWpR48eqlu3riIjI90yRwAAAAA3P7c/B9a7d28dP35cY8aMUUpKisLCwrRixQrnIhdHjhyR1fp/GfDjjz/WhQsX9Mgjj7j0M3bsWI0bN04eHh7asWOH5s6dq7S0NIWEhKhTp04aP358no/+AQAAAIAZ3B6uJGno0KEaOnRontvWrVvn8v7QoUNX7cvHx0ffffedSSMDAAAAgIJx+2OBAAAAAHAzIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYoESEqxkzZqhWrVry9vZW69attWXLlqvWL168WA0aNJC3t7eaNGmi5cuXu2w3DENjxoxR1apV5ePjo4iICO3fv784pwAAAADgFuf2cLVo0SJFRUVp7Nix2rZtm5o1a6bIyEgdO3Ysz/pNmzapb9++GjRokLZv366ePXuqZ8+e2rlzp7Pm3Xff1QcffKCZM2dq8+bNKlu2rCIjI3X+/PkbNS0AAAAAtxi3h6vJkydr8ODBGjhwoBo2bKiZM2fK19dXs2fPzrN+2rRp6ty5s0aOHKk77rhD48ePV/PmzTV9+nRJl+5aTZ06Vf/4xz/Uo0cPNW3aVPPmzdPRo0e1ZMmSGzgzAAAAALcSmzsPfuHCBW3dulWjRo1ytlmtVkVERCg+Pj7PfeLj4xUVFeXSFhkZ6QxOSUlJSklJUUREhHO7v7+/Wrdurfj4ePXp0ydXn9nZ2crOzna+P3PmjCTp1KlTstvtRZ6fGdLT02WxOJSV9aukdNP7z8r6Uw5Htnbt2qX0dPP7BwAAwM3n999/l8NxsVivUS0Wh9LT03Xq1CnT+y+My9fIhmFcs9at4erEiRPKyclRUFCQS3tQUJD27NmT5z4pKSl51qekpDi3X27Lr+ZKMTExio6OztUeGhpasIncEMuvXXIdevRYVaz9AwAA4Gb0XbH23rx58V4DF8bZs2fl7+9/1Rq3hquSYtSoUS53wxwOh06dOqVKlSrJYrE429PT01W9enX9/vvv8vPzc8dQcQvhfMONwrmGG4nzDTcS5xvMYBiGzp49q5CQkGvWujVcBQYGysPDQ6mpqS7tqampCg4OznOf4ODgq9Zf/t/U1FRVrVrVpSYsLCzPPr28vOTl5eXSFhAQkO+4/fz8+AcUNwznG24UzjXcSJxvuJE433C9rnXH6jK3Lmjh6empFi1aKC4uztnmcDgUFxen8PDwPPcJDw93qZekVatWOetDQ0MVHBzsUpOenq7Nmzfn2ycAAAAAXC+3PxYYFRWl/v37q2XLlmrVqpWmTp2qzMxMDRw4UJLUr18/3XbbbYqJiZEkDRs2TO3atdOkSZPUrVs3LVy4UAkJCZo1a5YkyWKxaPjw4ZowYYLq1aun0NBQvfnmmwoJCVHPnj3dNU0AAAAANzm3h6vevXvr+PHjGjNmjFJSUhQWFqYVK1Y4F6Q4cuSIrNb/u8F2zz336PPPP9c//vEPjR49WvXq1dOSJUvUuHFjZ82rr76qzMxMPfvss0pLS1Pbtm21YsUKeXt7X9dYvby8NHbs2FyPEALFgfMNNwrnGm4kzjfcSJxvuNEsRkHWFAQAAAAAXJXbf0QYAAAAAG4GhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgpoxowZqlWrlry9vdW6dWtt2bLF3UNCKbBhwwZ1795dISEhslgsWrJkict2wzA0ZswYVa1aVT4+PoqIiND+/ftdak6dOqUnnnhCfn5+CggI0KBBg5SRkeFSs2PHDt17773y9vZW9erV9e677xb31FDCxMTE6K677lL58uVVpUoV9ezZU3v37nWpOX/+vIYMGaJKlSqpXLly+tvf/pbrR9mPHDmibt26ydfXV1WqVNHIkSNlt9tdatatW6fmzZvLy8tLdevWVWxsbHFPDyXMxx9/rKZNmzp/mDU8PFzffvutczvnGorLxIkTnT+7cxnnG0oUA9e0cOFCw9PT05g9e7axa9cuY/DgwUZAQICRmprq7qGhhFu+fLnxxhtvGF9++aUhyfjqq69ctk+cONHw9/c3lixZYvz888/GQw89ZISGhhpZWVnOms6dOxvNmjUzfvzxR+P777836tata/Tt29e5/cyZM0ZQUJDxxBNPGDt37jQWLFhg+Pj4GJ988smNmiZKgMjISGPOnDnGzp07jcTERKNr165GjRo1jIyMDGfN888/b1SvXt2Ii4szEhISjLvvvtu45557nNvtdrvRuHFjIyIiwti+fbuxfPlyIzAw0Bg1apSz5rfffjN8fX2NqKgoY/fu3caHH35oeHh4GCtWrLih84V7ff3118ayZcuMffv2GXv37jVGjx5tlClTxti5c6dhGJxrKB5btmwxatWqZTRt2tQYNmyYs53zDSUJ4aoAWrVqZQwZMsT5PicnxwgJCTFiYmLcOCqUNleGK4fDYQQHBxvvvfeesy0tLc3w8vIyFixYYBiGYezevduQZPz000/Omm+//dawWCzGn3/+aRiGYXz00UdGhQoVjOzsbGfNa6+9ZtSvX7+YZ4SS7NixY4YkY/369YZhXDq3ypQpYyxevNhZ8+uvvxqSjPj4eMMwLv3HAKvVaqSkpDhrPv74Y8PPz895fr366qtGo0aNXI7Vu3dvIzIysrinhBKuQoUKxqeffsq5hmJx9uxZo169esaqVauMdu3aOcMV5xtKGh4LvIYLFy5o69atioiIcLZZrVZFREQoPj7ejSNDaZeUlKSUlBSXc8vf31+tW7d2nlvx8fEKCAhQy5YtnTURERGyWq3avHmzs+a+++6Tp6ensyYyMlJ79+7V6dOnb9BsUNKcOXNGklSxYkVJ0tatW3Xx4kWX861BgwaqUaOGy/nWpEkT54+4S5fOpfT0dO3atctZ89c+Ltfw78NbV05OjhYuXKjMzEyFh4dzrqFYDBkyRN26dct1TnC+oaSxuXsAJd2JEyeUk5Pj8g+kJAUFBWnPnj1uGhVuBikpKZKU57l1eVtKSoqqVKnist1ms6lixYouNaGhobn6uLytQoUKxTJ+lFwOh0PDhw9XmzZt1LhxY0mXzgVPT08FBAS41F55vuV1Pl7edrWa9PR0ZWVlycfHpzimhBLol19+UXh4uM6fP69y5crpq6++UsOGDZWYmMi5BlMtXLhQ27Zt008//ZRrG/9uQ0lDuAKAm8yQIUO0c+dObdy40d1DwU2sfv36SkxM1JkzZ/TFF1+of//+Wr9+vbuHhZvM77//rmHDhmnVqlXy9vZ293CAa+KxwGsIDAyUh4dHrlVnUlNTFRwc7KZR4WZw+fy52rkVHBysY8eOuWy32+06deqUS01effz1GLh1DB06VN98843Wrl2ratWqOduDg4N14cIFpaWludRfeb5d61zKr8bPz4//snuL8fT0VN26ddWiRQvFxMSoWbNmmjZtGucaTLV161YdO3ZMzZs3l81mk81m0/r16/XBBx/IZrMpKCiI8w0lCuHqGjw9PdWiRQvFxcU52xwOh+Li4hQeHu7GkaG0Cw0NVXBwsMu5lZ6ers2bNzvPrfDwcKWlpWnr1q3OmjVr1sjhcKh169bOmg0bNujixYvOmlWrVql+/fo8EngLMQxDQ4cO1VdffaU1a9bkelS0RYsWKlOmjMv5tnfvXh05csTlfPvll19cAv2qVavk5+enhg0bOmv+2sflGv59CIfDoezsbM41mKpjx4765ZdflJiY6Hy1bNlSTzzxhPPPnG8oUdy9okZpsHDhQsPLy8uIjY01du/ebTz77LNGQECAy6ozQF7Onj1rbN++3di+fbshyZg8ebKxfft24/Dhw4ZhXFqKPSAgwFi6dKmxY8cOo0ePHnkuxX7nnXcamzdvNjZu3GjUq1fPZSn2tLQ0IygoyHjqqaeMnTt3GgsXLjR8fX1Ziv0W88ILLxj+/v7GunXrjOTkZOfr3Llzzprnn3/eqFGjhrFmzRojISHBCA8PN8LDw53bLy9X3KlTJyMxMdFYsWKFUbly5TyXKx45cqTx66+/GjNmzGC54lvQ66+/bqxfv95ISkoyduzYYbz++uuGxWIxVq5caRgG5xqK119XCzQMzjeULISrAvrwww+NGjVqGJ6enkarVq2MH3/80d1DQimwdu1aQ1KuV//+/Q3DuLQc+5tvvmkEBQUZXl5eRseOHY29e/e69HHy5Emjb9++Rrly5Qw/Pz9j4MCBxtmzZ11qfv75Z6Nt27aGl5eXcdtttxkTJ068UVNECZHXeSbJmDNnjrMmKyvLePHFF40KFSoYvr6+Rq9evYzk5GSXfg4dOmR06dLF8PHxMQIDA40RI0YYFy9edKlZu3atERYWZnh6ehq1a9d2OQZuDU8//bRRs2ZNw9PT06hcubLRsWNHZ7AyDM41FK8rwxXnG0oSi2EYhnvumQEAAADAzYPvXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQBKlUOHDslisSgxMdHdQwEAwAXhCgBww1kslqu+xo0b5+4h5unAgQMaOHCgqlWrJi8vL4WGhqpv375KSEi4oeMgYAJAyWRz9wAAALee5ORk558XLVqkMWPGaO/evc62cuXKuWNYV5WQkKCOHTuqcePG+uSTT9SgQQOdPXtWS5cu1YgRI7R+/Xp3DxEA4GbcuQIA3HDBwcHOl7+/vywWi/N9lSpVNHnyZOfdobCwMK1YsSLfvnJycvT000+rQYMGOnLkiCRp6dKlat68uby9vVW7dm1FR0fLbrc797FYLPr000/Vq1cv+fr6ql69evr666/zPYZhGBowYIDq1aun77//Xt26dVOdOnUUFhamsWPHaunSpc7aX375Rffff798fHxUqVIlPfvss8rIyHBub9++vYYPH+7Sf8+ePTVgwADn+1q1auntt9/W008/rfLly6tGjRqaNWuWc3toaKgk6c4775TFYlH79u2v+nkDAG4MwhUAoESZNm2aJk2apPfff187duxQZGSkHnroIe3fvz9XbXZ2th599FElJibq+++/V40aNfT999+rX79+GjZsmHbv3q1PPvlEsbGx+uc//+myb3R0tB577DHt2LFDXbt21RNPPKFTp07lOabExETt2rVLI0aMkNWa+/86AwICJEmZmZmKjIxUhQoV9NNPP2nx4sVavXq1hg4dWujPYdKkSWrZsqW2b9+uF198US+88ILz7t6WLVskSatXr1ZycrK+/PLLQvcPADAf4QoAUKK8//77eu2119SnTx/Vr19f77zzjsLCwjR16lSXuoyMDHXr1k3Hjx/X2rVrVblyZUmXQtPrr7+u/v37q3bt2nrggQc0fvx4ffLJJy77DxgwQH379lXdunX19ttvKyMjwxlarnQ52DVo0OCqY//88891/vx5zZs3T40bN9b999+v6dOn69///rdSU1ML9Tl07dpVL774ourWravXXntNgYGBWrt2rSQ551qpUiUFBwerYsWKheobAFA8+M4VAKDESE9P19GjR9WmTRuX9jZt2ujnn392aevbt6+qVaumNWvWyMfHx9n+888/64cffnC5U5WTk6Pz58/r3Llz8vX1lSQ1bdrUub1s2bLy8/PTsWPH8hyXYRgFGv+vv/6qZs2aqWzZsi5jdzgc2rt3r4KCggrUz5Xju/zYZH7jAwCUDNy5AgCUSl27dtWOHTsUHx/v0p6RkaHo6GglJiY6X7/88ov2798vb29vZ12ZMmVc9rNYLHI4HHke6/bbb5ck7dmz57rHbbVac4W1ixcv5qorzPgAACUD4QoAUGL4+fkpJCREP/zwg0v7Dz/8oIYNG7q0vfDCC5o4caIeeughl5X6mjdvrr1796pu3bq5Xnl9X6ogwsLC1LBhQ02aNCnPgJOWliZJuuOOO/Tzzz8rMzPTZexWq1X169eXdOmRvr+ulpiTk6OdO3cWajyenp7OfQEAJQfhCgBQoowcOVLvvPOOFi1apL179+r1119XYmKihg0blqv2pZde0oQJE/Tggw9q48aNkqQxY8Zo3rx5io6O1q5du/Trr79q4cKF+sc//lHkMVksFs2ZM0f79u3Tvffeq+XLl+u3337Tjh079M9//lM9evSQJD3xxBPy9vZW//79tXPnTq1du1YvvfSSnnrqKecjgffff7+WLVumZcuWac+ePXrhhRec4aygqlSpIh8fH61YsUKpqak6c+ZMkecGADAP4QoAUKK8/PLLioqK0ogRI9SkSROtWLFCX3/9terVq5dn/fDhwxUdHa2uXbtq06ZNioyM1DfffKOVK1fqrrvu0t13360pU6aoZs2a1zWuVq1aKSEhQXXr1tXgwYN1xx136KGHHtKuXbuci234+vrqu+++06lTp3TXXXfpkUceUceOHTV9+nRnP08//bT69++vfv36qV27dqpdu7Y6dOhQqLHYbDZ98MEH+uSTTxQSEuIMdwAA97IYBf2WLgAAAAAgX9y5AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADDB/wdndY+J7VJjoQAAAABJRU5ErkJggg==\n"
466
          },
467
          "metadata": {}
468
        }
469
      ],
470
      "source": [
471
        "import matplotlib.pyplot as plt\n",
472
        "import os\n",
473
        "import tiktoken\n",
474
        "from bs4 import BeautifulSoup as Soup\n",
475
        "from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader\n",
476
        "\n",
477
        "\n",
478
        "def num_tokens_from_string(string: str, encoding_name: str) -> int:\n",
479
        "    \"\"\"Returns the number of tokens in a text string.\"\"\"\n",
480
        "    encoding = tiktoken.get_encoding(encoding_name)\n",
481
        "    num_tokens = len(encoding.encode(string))\n",
482
        "    return num_tokens\n",
483
        "\n",
484
        "\n",
485
        "# LCEL docs\n",
486
        "url = \"https://www.sciencedirect.com/science/article/pii/S135964462400117X\"\n",
487
        "loader = RecursiveUrlLoader(\n",
488
        "    url=url, max_depth=1, extractor=lambda x: Soup(x, \"html.parser\").text\n",
489
        ")\n",
490
        "docs = loader.load()\n",
491
        "\n",
492
        "# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)\n",
493
        "url = \"https://www.chemicalqdevice.com/cancer-drug-discovery-innovation\"\n",
494
        "loader = RecursiveUrlLoader(\n",
495
        "    url=url, max_depth=1, extractor=lambda x: Soup(x, \"html.parser\").text\n",
496
        ")\n",
497
        "docs_pydantic = loader.load()\n",
498
        "\n",
499
        "# LCEL w/ Self Query (outside the primary LCEL docs)\n",
500
        "url = \"https://www.chemicalqdevice.com/cancer-drug-discovery-ai\"\n",
501
        "loader = RecursiveUrlLoader(\n",
502
        "    url=url, max_depth=1, extractor=lambda x: Soup(x, \"html.parser\").text\n",
503
        ")\n",
504
        "docs_sq = loader.load()\n",
505
        "\n",
506
        "# Doc texts\n",
507
        "docs.extend([*docs_pydantic, *docs_sq])\n",
508
        "docs_texts = [d.page_content for d in docs]\n",
509
        "\n",
510
        "# Calculate the number of tokens for each document\n",
511
        "counts = [num_tokens_from_string(d, \"cl100k_base\") for d in docs_texts]\n",
512
        "\n",
513
        "# Plotting the histogram of token counts\n",
514
        "plt.figure(figsize=(10, 6))\n",
515
        "plt.hist(counts, bins=30, color=\"blue\", edgecolor=\"black\", alpha=0.7)\n",
516
        "plt.title(\"Histogram of Token Counts\")\n",
517
        "plt.xlabel(\"Token Count\")\n",
518
        "plt.ylabel(\"Frequency\")\n",
519
        "plt.grid(axis=\"y\", alpha=0.75)\n",
520
        "\n",
521
        "# Display the histogram\n",
522
        "plt.show"
523
      ]
524
    },
525
    {
526
      "cell_type": "code",
527
      "execution_count": 5,
528
      "id": "70750603-ec82-4439-9b32-d22014b5ff2c",
529
      "metadata": {
530
        "colab": {
531
          "base_uri": "https://localhost:8080/",
532
          "height": 0
533
        },
534
        "id": "70750603-ec82-4439-9b32-d22014b5ff2c",
535
        "outputId": "a7cd4135-c15f-4b3e-eb93-fbfe969a591a"
536
      },
537
      "outputs": [
538
        {
539
          "output_type": "stream",
540
          "name": "stdout",
541
          "text": [
542
            "Num tokens in all context: 9511\n"
543
          ]
544
        }
545
      ],
546
      "source": [
547
        "# Doc texts concat\n",
548
        "d_sorted = sorted(docs, key=lambda x: x.metadata[\"source\"])\n",
549
        "d_reversed = list(reversed(d_sorted))\n",
550
        "concatenated_content = \"\\n\\n\\n --- \\n\\n\\n\".join(\n",
551
        "    [doc.page_content for doc in d_reversed]\n",
552
        ")\n",
553
        "print(\n",
554
        "    \"Num tokens in all context: %s\"\n",
555
        "    % num_tokens_from_string(concatenated_content, \"cl100k_base\")\n",
556
        ")"
557
      ]
558
    },
559
    {
560
      "cell_type": "code",
561
      "execution_count": 6,
562
      "id": "25ca3cf2-0f6b-40f9-a2ff-285a8dcb33dc",
563
      "metadata": {
564
        "id": "25ca3cf2-0f6b-40f9-a2ff-285a8dcb33dc"
565
      },
566
      "outputs": [],
567
      "source": [
568
        "# Doc texts split\n",
569
        "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
570
        "\n",
571
        "chunk_size_tok = 2000\n",
572
        "text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n",
573
        "    chunk_size=chunk_size_tok, chunk_overlap=0\n",
574
        ")\n",
575
        "texts_split = text_splitter.split_text(concatenated_content)"
576
      ]
577
    },
578
    {
579
      "cell_type": "markdown",
580
      "id": "797a5469-0942-45a5-adb6-f12e05d76798",
581
      "metadata": {
582
        "id": "797a5469-0942-45a5-adb6-f12e05d76798"
583
      },
584
      "source": [
585
        "## Models\n",
586
        "\n",
587
        "We can test various models, including the new [Claude3](https://www.anthropic.com/news/claude-3-family) family.\n",
588
        "\n",
589
        "Be sure to set the relevant API keys:\n",
590
        "\n",
591
        "* `ANTHROPIC_API_KEY`\n",
592
        "* `OPENAI_API_KEY`"
593
      ]
594
    },
595
    {
596
      "cell_type": "code",
597
      "execution_count": 7,
598
      "id": "033e71d3-5dc8-42a3-a0b7-4df116048c14",
599
      "metadata": {
600
        "id": "033e71d3-5dc8-42a3-a0b7-4df116048c14"
601
      },
602
      "outputs": [],
603
      "source": [
604
        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
605
        "\n",
606
        "from langchain_openai import OpenAIEmbeddings\n",
607
        "\n",
608
        "embd = OpenAIEmbeddings()\n",
609
        "\n",
610
        "from langchain_openai import ChatOpenAI\n",
611
        "\n",
612
        "model = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0125\")\n",
613
        "\n",
614
        "# from langchain_anthropic import ChatAnthropic\n",
615
        "\n",
616
        "# model = ChatAnthropic(temperature=0, model=\"claude-3-opus-20240229\")"
617
      ]
618
    },
619
    {
620
      "cell_type": "markdown",
621
      "id": "5c63db01-cf95-4c17-ae5d-8dc7267ad58a",
622
      "metadata": {
623
        "id": "5c63db01-cf95-4c17-ae5d-8dc7267ad58a"
624
      },
625
      "source": [
626
        "### Tree Constrution\n",
627
        "\n",
628
        "The clustering approach in tree construction includes a few interesting ideas.\n",
629
        "\n",
630
        "**GMM (Gaussian Mixture Model)**\n",
631
        "\n",
632
        "- Model the distribution of data points across different clusters\n",
633
        "- Optimal number of clusters by evaluating the model's Bayesian Information Criterion (BIC)\n",
634
        "\n",
635
        "**UMAP (Uniform Manifold Approximation and Projection)**\n",
636
        "\n",
637
        "- Supports clustering\n",
638
        "- Reduces the dimensionality of high-dimensional data\n",
639
        "- UMAP helps to highlight the natural grouping of data points based on their similarities\n",
640
        "\n",
641
        "**Local and Global Clustering**\n",
642
        "\n",
643
        "- Used to analyze data at different scales\n",
644
        "- Both fine-grained and broader patterns within the data are captured effectively\n",
645
        "\n",
646
        "**Thresholding**\n",
647
        "\n",
648
        "- Apply in the context of GMM to determine cluster membership\n",
649
        "- Based on the probability distribution (assignment of data points to ≥ 1 cluster)\n",
650
        "---\n",
651
        "\n",
652
        "Code for GMM and thresholding is from Sarthi et al, as noted in the below two sources:\n",
653
        "\n",
654
        "* [Origional repo](https://github.com/parthsarthi03/raptor/blob/master/raptor/cluster_tree_builder.py)\n",
655
        "* [Minor tweaks](https://github.com/run-llama/llama_index/blob/main/llama-index-packs/llama-index-packs-raptor/llama_index/packs/raptor/clustering.py)\n",
656
        "\n",
657
        "Full credit to both authors."
658
      ]
659
    },
660
    {
661
      "cell_type": "code",
662
      "execution_count": 8,
663
      "id": "a849980c-27d4-48e0-87a0-c2a5143cb8c0",
664
      "metadata": {
665
        "id": "a849980c-27d4-48e0-87a0-c2a5143cb8c0"
666
      },
667
      "outputs": [],
668
      "source": [
669
        "from typing import Dict, List, Optional, Tuple\n",
670
        "\n",
671
        "import numpy as np\n",
672
        "import pandas as pd\n",
673
        "import umap\n",
674
        "from langchain.prompts import ChatPromptTemplate\n",
675
        "from langchain_core.output_parsers import StrOutputParser\n",
676
        "from sklearn.mixture import GaussianMixture\n",
677
        "\n",
678
        "RANDOM_SEED = 224  # Fixed seed for reproducibility\n",
679
        "\n",
680
        "### --- Code from citations referenced above (added comments and docstrings) --- ###\n",
681
        "\n",
682
        "\n",
683
        "def global_cluster_embeddings(\n",
684
        "    embeddings: np.ndarray,\n",
685
        "    dim: int,\n",
686
        "    n_neighbors: Optional[int] = None,\n",
687
        "    metric: str = \"cosine\",\n",
688
        ") -> np.ndarray:\n",
689
        "    \"\"\"\n",
690
        "    Perform global dimensionality reduction on the embeddings using UMAP.\n",
691
        "\n",
692
        "    Parameters:\n",
693
        "    - embeddings: The input embeddings as a numpy array.\n",
694
        "    - dim: The target dimensionality for the reduced space.\n",
695
        "    - n_neighbors: Optional; the number of neighbors to consider for each point.\n",
696
        "                   If not provided, it defaults to the square root of the number of embeddings.\n",
697
        "    - metric: The distance metric to use for UMAP.\n",
698
        "\n",
699
        "    Returns:\n",
700
        "    - A numpy array of the embeddings reduced to the specified dimensionality.\n",
701
        "    \"\"\"\n",
702
        "    if n_neighbors is None:\n",
703
        "        n_neighbors = int((len(embeddings) - 1) ** 0.5)\n",
704
        "    return umap.UMAP(\n",
705
        "        n_neighbors=n_neighbors, n_components=dim, metric=metric\n",
706
        "    ).fit_transform(embeddings)\n",
707
        "\n",
708
        "\n",
709
        "def local_cluster_embeddings(\n",
710
        "    embeddings: np.ndarray, dim: int, num_neighbors: int = 10, metric: str = \"cosine\"\n",
711
        ") -> np.ndarray:\n",
712
        "    \"\"\"\n",
713
        "    Perform local dimensionality reduction on the embeddings using UMAP, typically after global clustering.\n",
714
        "\n",
715
        "    Parameters:\n",
716
        "    - embeddings: The input embeddings as a numpy array.\n",
717
        "    - dim: The target dimensionality for the reduced space.\n",
718
        "    - num_neighbors: The number of neighbors to consider for each point.\n",
719
        "    - metric: The distance metric to use for UMAP.\n",
720
        "\n",
721
        "    Returns:\n",
722
        "    - A numpy array of the embeddings reduced to the specified dimensionality.\n",
723
        "    \"\"\"\n",
724
        "    return umap.UMAP(\n",
725
        "        n_neighbors=num_neighbors, n_components=dim, metric=metric\n",
726
        "    ).fit_transform(embeddings)\n",
727
        "\n",
728
        "\n",
729
        "def get_optimal_clusters(\n",
730
        "    embeddings: np.ndarray, max_clusters: int = 50, random_state: int = RANDOM_SEED\n",
731
        ") -> int:\n",
732
        "    \"\"\"\n",
733
        "    Determine the optimal number of clusters using the Bayesian Information Criterion (BIC) with a Gaussian Mixture Model.\n",
734
        "\n",
735
        "    Parameters:\n",
736
        "    - embeddings: The input embeddings as a numpy array.\n",
737
        "    - max_clusters: The maximum number of clusters to consider.\n",
738
        "    - random_state: Seed for reproducibility.\n",
739
        "\n",
740
        "    Returns:\n",
741
        "    - An integer representing the optimal number of clusters found.\n",
742
        "    \"\"\"\n",
743
        "    max_clusters = min(max_clusters, len(embeddings))\n",
744
        "    n_clusters = np.arange(1, max_clusters)\n",
745
        "    bics = []\n",
746
        "    for n in n_clusters:\n",
747
        "        gm = GaussianMixture(n_components=n, random_state=random_state)\n",
748
        "        gm.fit(embeddings)\n",
749
        "        bics.append(gm.bic(embeddings))\n",
750
        "    return n_clusters[np.argmin(bics)]\n",
751
        "\n",
752
        "\n",
753
        "def GMM_cluster(embeddings: np.ndarray, threshold: float, random_state: int = 0):\n",
754
        "    \"\"\"\n",
755
        "    Cluster embeddings using a Gaussian Mixture Model (GMM) based on a probability threshold.\n",
756
        "\n",
757
        "    Parameters:\n",
758
        "    - embeddings: The input embeddings as a numpy array.\n",
759
        "    - threshold: The probability threshold for assigning an embedding to a cluster.\n",
760
        "    - random_state: Seed for reproducibility.\n",
761
        "\n",
762
        "    Returns:\n",
763
        "    - A tuple containing the cluster labels and the number of clusters determined.\n",
764
        "    \"\"\"\n",
765
        "    n_clusters = get_optimal_clusters(embeddings)\n",
766
        "    gm = GaussianMixture(n_components=n_clusters, random_state=random_state)\n",
767
        "    gm.fit(embeddings)\n",
768
        "    probs = gm.predict_proba(embeddings)\n",
769
        "    labels = [np.where(prob > threshold)[0] for prob in probs]\n",
770
        "    return labels, n_clusters\n",
771
        "\n",
772
        "\n",
773
        "def perform_clustering(\n",
774
        "    embeddings: np.ndarray,\n",
775
        "    dim: int,\n",
776
        "    threshold: float,\n",
777
        ") -> List[np.ndarray]:\n",
778
        "    \"\"\"\n",
779
        "    Perform clustering on the embeddings by first reducing their dimensionality globally, then clustering\n",
780
        "    using a Gaussian Mixture Model, and finally performing local clustering within each global cluster.\n",
781
        "\n",
782
        "    Parameters:\n",
783
        "    - embeddings: The input embeddings as a numpy array.\n",
784
        "    - dim: The target dimensionality for UMAP reduction.\n",
785
        "    - threshold: The probability threshold for assigning an embedding to a cluster in GMM.\n",
786
        "\n",
787
        "    Returns:\n",
788
        "    - A list of numpy arrays, where each array contains the cluster IDs for each embedding.\n",
789
        "    \"\"\"\n",
790
        "    if len(embeddings) <= dim + 1:\n",
791
        "        # Avoid clustering when there's insufficient data\n",
792
        "        return [np.array([0]) for _ in range(len(embeddings))]\n",
793
        "\n",
794
        "    # Global dimensionality reduction\n",
795
        "    reduced_embeddings_global = global_cluster_embeddings(embeddings, dim)\n",
796
        "    # Global clustering\n",
797
        "    global_clusters, n_global_clusters = GMM_cluster(\n",
798
        "        reduced_embeddings_global, threshold\n",
799
        "    )\n",
800
        "\n",
801
        "    all_local_clusters = [np.array([]) for _ in range(len(embeddings))]\n",
802
        "    total_clusters = 0\n",
803
        "\n",
804
        "    # Iterate through each global cluster to perform local clustering\n",
805
        "    for i in range(n_global_clusters):\n",
806
        "        # Extract embeddings belonging to the current global cluster\n",
807
        "        global_cluster_embeddings_ = embeddings[\n",
808
        "            np.array([i in gc for gc in global_clusters])\n",
809
        "        ]\n",
810
        "\n",
811
        "        if len(global_cluster_embeddings_) == 0:\n",
812
        "            continue\n",
813
        "        if len(global_cluster_embeddings_) <= dim + 1:\n",
814
        "            # Handle small clusters with direct assignment\n",
815
        "            local_clusters = [np.array([0]) for _ in global_cluster_embeddings_]\n",
816
        "            n_local_clusters = 1\n",
817
        "        else:\n",
818
        "            # Local dimensionality reduction and clustering\n",
819
        "            reduced_embeddings_local = local_cluster_embeddings(\n",
820
        "                global_cluster_embeddings_, dim\n",
821
        "            )\n",
822
        "            local_clusters, n_local_clusters = GMM_cluster(\n",
823
        "                reduced_embeddings_local, threshold\n",
824
        "            )\n",
825
        "\n",
826
        "        # Assign local cluster IDs, adjusting for total clusters already processed\n",
827
        "        for j in range(n_local_clusters):\n",
828
        "            local_cluster_embeddings_ = global_cluster_embeddings_[\n",
829
        "                np.array([j in lc for lc in local_clusters])\n",
830
        "            ]\n",
831
        "            indices = np.where(\n",
832
        "                (embeddings == local_cluster_embeddings_[:, None]).all(-1)\n",
833
        "            )[1]\n",
834
        "            for idx in indices:\n",
835
        "                all_local_clusters[idx] = np.append(\n",
836
        "                    all_local_clusters[idx], j + total_clusters\n",
837
        "                )\n",
838
        "\n",
839
        "        total_clusters += n_local_clusters\n",
840
        "\n",
841
        "    return all_local_clusters\n",
842
        "\n",
843
        "\n",
844
        "### --- Our code below --- ###\n",
845
        "\n",
846
        "\n",
847
        "def embed(texts):\n",
848
        "    \"\"\"\n",
849
        "    Generate embeddings for a list of text documents.\n",
850
        "\n",
851
        "    This function assumes the existence of an `embd` object with a method `embed_documents`\n",
852
        "    that takes a list of texts and returns their embeddings.\n",
853
        "\n",
854
        "    Parameters:\n",
855
        "    - texts: List[str], a list of text documents to be embedded.\n",
856
        "\n",
857
        "    Returns:\n",
858
        "    - numpy.ndarray: An array of embeddings for the given text documents.\n",
859
        "    \"\"\"\n",
860
        "    text_embeddings = embd.embed_documents(texts)\n",
861
        "    text_embeddings_np = np.array(text_embeddings)\n",
862
        "    return text_embeddings_np\n",
863
        "\n",
864
        "\n",
865
        "def embed_cluster_texts(texts):\n",
866
        "    \"\"\"\n",
867
        "    Embeds a list of texts and clusters them, returning a DataFrame with texts, their embeddings, and cluster labels.\n",
868
        "\n",
869
        "    This function combines embedding generation and clustering into a single step. It assumes the existence\n",
870
        "    of a previously defined `perform_clustering` function that performs clustering on the embeddings.\n",
871
        "\n",
872
        "    Parameters:\n",
873
        "    - texts: List[str], a list of text documents to be processed.\n",
874
        "\n",
875
        "    Returns:\n",
876
        "    - pandas.DataFrame: A DataFrame containing the original texts, their embeddings, and the assigned cluster labels.\n",
877
        "    \"\"\"\n",
878
        "    text_embeddings_np = embed(texts)  # Generate embeddings\n",
879
        "    cluster_labels = perform_clustering(\n",
880
        "        text_embeddings_np, 10, 0.1\n",
881
        "    )  # Perform clustering on the embeddings\n",
882
        "    df = pd.DataFrame()  # Initialize a DataFrame to store the results\n",
883
        "    df[\"text\"] = texts  # Store original texts\n",
884
        "    df[\"embd\"] = list(text_embeddings_np)  # Store embeddings as a list in the DataFrame\n",
885
        "    df[\"cluster\"] = cluster_labels  # Store cluster labels\n",
886
        "    return df\n",
887
        "\n",
888
        "\n",
889
        "def fmt_txt(df: pd.DataFrame) -> str:\n",
890
        "    \"\"\"\n",
891
        "    Formats the text documents in a DataFrame into a single string.\n",
892
        "\n",
893
        "    Parameters:\n",
894
        "    - df: DataFrame containing the 'text' column with text documents to format.\n",
895
        "\n",
896
        "    Returns:\n",
897
        "    - A single string where all text documents are joined by a specific delimiter.\n",
898
        "    \"\"\"\n",
899
        "    unique_txt = df[\"text\"].tolist()\n",
900
        "    return \"--- --- \\n --- --- \".join(unique_txt)\n",
901
        "\n",
902
        "\n",
903
        "def embed_cluster_summarize_texts(\n",
904
        "    texts: List[str], level: int\n",
905
        ") -> Tuple[pd.DataFrame, pd.DataFrame]:\n",
906
        "    \"\"\"\n",
907
        "    Embeds, clusters, and summarizes a list of texts. This function first generates embeddings for the texts,\n",
908
        "    clusters them based on similarity, expands the cluster assignments for easier processing, and then summarizes\n",
909
        "    the content within each cluster.\n",
910
        "\n",
911
        "    Parameters:\n",
912
        "    - texts: A list of text documents to be processed.\n",
913
        "    - level: An integer parameter that could define the depth or detail of processing.\n",
914
        "\n",
915
        "    Returns:\n",
916
        "    - Tuple containing two DataFrames:\n",
917
        "      1. The first DataFrame (`df_clusters`) includes the original texts, their embeddings, and cluster assignments.\n",
918
        "      2. The second DataFrame (`df_summary`) contains summaries for each cluster, the specified level of detail,\n",
919
        "         and the cluster identifiers.\n",
920
        "    \"\"\"\n",
921
        "\n",
922
        "    # Embed and cluster the texts, resulting in a DataFrame with 'text', 'embd', and 'cluster' columns\n",
923
        "    df_clusters = embed_cluster_texts(texts)\n",
924
        "\n",
925
        "    # Prepare to expand the DataFrame for easier manipulation of clusters\n",
926
        "    expanded_list = []\n",
927
        "\n",
928
        "    # Expand DataFrame entries to document-cluster pairings for straightforward processing\n",
929
        "    for index, row in df_clusters.iterrows():\n",
930
        "        for cluster in row[\"cluster\"]:\n",
931
        "            expanded_list.append(\n",
932
        "                {\"text\": row[\"text\"], \"embd\": row[\"embd\"], \"cluster\": cluster}\n",
933
        "            )\n",
934
        "\n",
935
        "    # Create a new DataFrame from the expanded list\n",
936
        "    expanded_df = pd.DataFrame(expanded_list)\n",
937
        "\n",
938
        "    # Retrieve unique cluster identifiers for processing\n",
939
        "    all_clusters = expanded_df[\"cluster\"].unique()\n",
940
        "\n",
941
        "    print(f\"--Generated {len(all_clusters)} clusters--\")\n",
942
        "\n",
943
        "    # Summarization\n",
944
        "    template = \"\"\"Here is a sub-set of Generative AI Drug Discovery doc.\n",
945
        "\n",
946
        "    Generative AI Drug Discovery provides a way to improve Drug Discovery.\n",
947
        "\n",
948
        "    Give a detailed summary of the documentation provided.\n",
949
        "\n",
950
        "    Documentation:\n",
951
        "    {context}\n",
952
        "    \"\"\"\n",
953
        "    prompt = ChatPromptTemplate.from_template(template)\n",
954
        "    chain = prompt | model | StrOutputParser()\n",
955
        "\n",
956
        "    # Format text within each cluster for summarization\n",
957
        "    summaries = []\n",
958
        "    for i in all_clusters:\n",
959
        "        df_cluster = expanded_df[expanded_df[\"cluster\"] == i]\n",
960
        "        formatted_txt = fmt_txt(df_cluster)\n",
961
        "        summaries.append(chain.invoke({\"context\": formatted_txt}))\n",
962
        "\n",
963
        "    # Create a DataFrame to store summaries with their corresponding cluster and level\n",
964
        "    df_summary = pd.DataFrame(\n",
965
        "        {\n",
966
        "            \"summaries\": summaries,\n",
967
        "            \"level\": [level] * len(summaries),\n",
968
        "            \"cluster\": list(all_clusters),\n",
969
        "        }\n",
970
        "    )\n",
971
        "\n",
972
        "    return df_clusters, df_summary\n",
973
        "\n",
974
        "\n",
975
        "def recursive_embed_cluster_summarize(\n",
976
        "    texts: List[str], level: int = 1, n_levels: int = 3\n",
977
        ") -> Dict[int, Tuple[pd.DataFrame, pd.DataFrame]]:\n",
978
        "    \"\"\"\n",
979
        "    Recursively embeds, clusters, and summarizes texts up to a specified level or until\n",
980
        "    the number of unique clusters becomes 1, storing the results at each level.\n",
981
        "\n",
982
        "    Parameters:\n",
983
        "    - texts: List[str], texts to be processed.\n",
984
        "    - level: int, current recursion level (starts at 1).\n",
985
        "    - n_levels: int, maximum depth of recursion.\n",
986
        "\n",
987
        "    Returns:\n",
988
        "    - Dict[int, Tuple[pd.DataFrame, pd.DataFrame]], a dictionary where keys are the recursion\n",
989
        "      levels and values are tuples containing the clusters DataFrame and summaries DataFrame at that level.\n",
990
        "    \"\"\"\n",
991
        "    results = {}  # Dictionary to store results at each level\n",
992
        "\n",
993
        "    # Perform embedding, clustering, and summarization for the current level\n",
994
        "    df_clusters, df_summary = embed_cluster_summarize_texts(texts, level)\n",
995
        "\n",
996
        "    # Store the results of the current level\n",
997
        "    results[level] = (df_clusters, df_summary)\n",
998
        "\n",
999
        "    # Determine if further recursion is possible and meaningful\n",
1000
        "    unique_clusters = df_summary[\"cluster\"].nunique()\n",
1001
        "    if level < n_levels and unique_clusters > 1:\n",
1002
        "        # Use summaries as the input texts for the next level of recursion\n",
1003
        "        new_texts = df_summary[\"summaries\"].tolist()\n",
1004
        "        next_level_results = recursive_embed_cluster_summarize(\n",
1005
        "            new_texts, level + 1, n_levels\n",
1006
        "        )\n",
1007
        "\n",
1008
        "        # Merge the results from the next level into the current results dictionary\n",
1009
        "        results.update(next_level_results)\n",
1010
        "\n",
1011
        "    return results"
1012
      ]
1013
    },
1014
    {
1015
      "cell_type": "code",
1016
      "execution_count": 9,
1017
      "id": "f0d8cd3e-cd49-484d-9617-1b9811cc08b3",
1018
      "metadata": {
1019
        "colab": {
1020
          "base_uri": "https://localhost:8080/",
1021
          "height": 0
1022
        },
1023
        "id": "f0d8cd3e-cd49-484d-9617-1b9811cc08b3",
1024
        "outputId": "37bc1b16-60d6-4f18-8139-20f93e6688fb"
1025
      },
1026
      "outputs": [
1027
        {
1028
          "output_type": "stream",
1029
          "name": "stdout",
1030
          "text": [
1031
            "--Generated 1 clusters--\n"
1032
          ]
1033
        }
1034
      ],
1035
      "source": [
1036
        "# Build tree\n",
1037
        "leaf_texts = docs_texts\n",
1038
        "results = recursive_embed_cluster_summarize(leaf_texts, level=1, n_levels=3)"
1039
      ]
1040
    },
1041
    {
1042
      "cell_type": "markdown",
1043
      "id": "e80d7098-5d16-4fa6-837c-968e5c9f118d",
1044
      "metadata": {
1045
        "id": "e80d7098-5d16-4fa6-837c-968e5c9f118d"
1046
      },
1047
      "source": [
1048
        "The paper reports best performance from `collapsed tree retrieval`.\n",
1049
        "\n",
1050
        "This involves flattening the tree structure into a single layer and then applying a k-nearest neighbors (kNN) search across all nodes simultaneously.\n",
1051
        "\n",
1052
        "We do simply do this below."
1053
      ]
1054
    },
1055
    {
1056
      "cell_type": "code",
1057
      "execution_count": 10,
1058
      "id": "d28ba9e6-9124-41a8-b4fd-55a6ef4ac062",
1059
      "metadata": {
1060
        "id": "d28ba9e6-9124-41a8-b4fd-55a6ef4ac062"
1061
      },
1062
      "outputs": [],
1063
      "source": [
1064
        "from langchain_community.vectorstores import Chroma\n",
1065
        "\n",
1066
        "# Initialize all_texts with leaf_texts\n",
1067
        "all_texts = leaf_texts.copy()\n",
1068
        "\n",
1069
        "# Iterate through the results to extract summaries from each level and add them to all_texts\n",
1070
        "for level in sorted(results.keys()):\n",
1071
        "    # Extract summaries from the current level's DataFrame\n",
1072
        "    summaries = results[level][1][\"summaries\"].tolist()\n",
1073
        "    # Extend all_texts with the summaries from the current level\n",
1074
        "    all_texts.extend(summaries)\n",
1075
        "\n",
1076
        "# Now, use all_texts to build the vectorstore with Chroma\n",
1077
        "vectorstore = Chroma.from_texts(texts=all_texts, embedding=embd)\n",
1078
        "retriever = vectorstore.as_retriever()"
1079
      ]
1080
    },
1081
    {
1082
      "cell_type": "markdown",
1083
      "id": "0d497627-44c6-41f7-bb63-1d858d3f188f",
1084
      "metadata": {
1085
        "id": "0d497627-44c6-41f7-bb63-1d858d3f188f"
1086
      },
1087
      "source": [
1088
        "Now we can using our flattened, indexed tree in a RAG chain."
1089
      ]
1090
    },
1091
    {
1092
      "cell_type": "code",
1093
      "execution_count": 11,
1094
      "id": "9d6c894b-b3a3-4a01-b779-3e98ea382ff5",
1095
      "metadata": {
1096
        "colab": {
1097
          "base_uri": "https://localhost:8080/",
1098
          "height": 72
1099
        },
1100
        "id": "9d6c894b-b3a3-4a01-b779-3e98ea382ff5",
1101
        "outputId": "4d45aeb5-4e29-4275-a069-50b72e295218"
1102
      },
1103
      "outputs": [
1104
        {
1105
          "output_type": "execute_result",
1106
          "data": {
1107
            "text/plain": [
1108
              "'Cancer is being addressed in Generative AI Drug Discovery through the incorporation of various methodologies into unique workflows. An example of this is the process initiated by ChemicalQDevice, founded by Kevin Kawchak, which aims to advance Cancer Drug Discovery through innovative AI technologies. The use of Generative AI, such as GPT and BERT, is enhancing the automation of research and report writing processes in this field.'"
1109
            ],
1110
            "application/vnd.google.colaboratory.intrinsic+json": {
1111
              "type": "string"
1112
            }
1113
          },
1114
          "metadata": {},
1115
          "execution_count": 11
1116
        }
1117
      ],
1118
      "source": [
1119
        "from langchain import hub\n",
1120
        "from langchain_core.runnables import RunnablePassthrough\n",
1121
        "\n",
1122
        "# Prompt\n",
1123
        "prompt = hub.pull(\"rlm/rag-prompt\")\n",
1124
        "\n",
1125
        "\n",
1126
        "# Post-processing\n",
1127
        "def format_docs(docs):\n",
1128
        "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
1129
        "\n",
1130
        "\n",
1131
        "# Chain\n",
1132
        "rag_chain = (\n",
1133
        "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
1134
        "    | prompt\n",
1135
        "    | model\n",
1136
        "    | StrOutputParser()\n",
1137
        ")\n",
1138
        "\n",
1139
        "# Question\n",
1140
        "rag_chain.invoke(\"How is cancer being addressed in Generative AI Drug Discovery? Give me a specific example.\")"
1141
      ]
1142
    },
1143
    {
1144
      "cell_type": "markdown",
1145
      "id": "0c585b37-ad83-4069-8f5d-4a6a3e15128d",
1146
      "metadata": {
1147
        "id": "0c585b37-ad83-4069-8f5d-4a6a3e15128d"
1148
      },
1149
      "source": [
1150
        "Trace:\n",
1151
        "\n",
1152
        "https://smith.langchain.com/public/1dabf475-1675-4494-b16c-928fbf079851/r"
1153
      ]
1154
    }
1155
  ],
1156
  "metadata": {
1157
    "kernelspec": {
1158
      "display_name": "Python 3",
1159
      "name": "python3"
1160
    },
1161
    "language_info": {
1162
      "codemirror_mode": {
1163
        "name": "ipython",
1164
        "version": 3
1165
      },
1166
      "file_extension": ".py",
1167
      "mimetype": "text/x-python",
1168
      "name": "python",
1169
      "nbconvert_exporter": "python",
1170
      "pygments_lexer": "ipython3",
1171
      "version": "3.9.16"
1172
    },
1173
    "colab": {
1174
      "provenance": [],
1175
      "machine_shape": "hm",
1176
      "gpuType": "L4"
1177
    },
1178
    "accelerator": "GPU"
1179
  },
1180
  "nbformat": 4,
1181
  "nbformat_minor": 5
1182
}