--- a +++ b/Code/LangChain/RAPTOR/gpt4o_RAPTOR, kkawchak.ipynb @@ -0,0 +1,1182 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3058e9ca-07c3-4eef-b98c-bc2f2dbb9cc6", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "3058e9ca-07c3-4eef-b98c-bc2f2dbb9cc6", + "outputId": "911fd243-c023-4d63-f559-7e60e8fd78cb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting langchain\n", + " Downloading langchain-0.1.20-py3-none-any.whl (1.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting umap-learn\n", + " Downloading umap_learn-0.5.6-py3-none-any.whl (85 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.7/85.7 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.2.2)\n", + "Collecting scikit-learn\n", + " Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.1/12.1 MB\u001b[0m \u001b[31m84.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain_community\n", + " Downloading langchain_community-0.0.38-py3-none-any.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m65.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tiktoken\n", + " Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m55.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain-openai\n", + " Downloading langchain_openai-0.1.7-py3-none-any.whl (34 kB)\n", + "Collecting langchainhub\n", + " Downloading langchainhub-0.1.15-py3-none-any.whl (4.6 kB)\n", + "Collecting chromadb\n", + " Downloading chromadb-0.5.0-py3-none-any.whl (526 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[31m47.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain-anthropic\n", + " Downloading langchain_anthropic-0.1.12-py3-none-any.whl (16 kB)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.30)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.5)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n", + "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n", + " Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)\n", + "Collecting langchain-core<0.2.0,>=0.1.52 (from langchain)\n", + " Downloading langchain_core-0.1.52-py3-none-any.whl (302 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.9/302.9 kB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)\n", + " Downloading langchain_text_splitters-0.0.2-py3-none-any.whl (23 kB)\n", + "Collecting langsmith<0.2.0,>=0.1.17 (from langchain)\n", + " Downloading langsmith-0.1.59-py3-none-any.whl (121 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.2/121.2 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.25.2)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.7.1)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.3.0)\n", + "Requirement already satisfied: scipy>=1.3.1 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.11.4)\n", + "Requirement already satisfied: numba>=0.51.2 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (0.58.1)\n", + "Collecting pynndescent>=0.5 (from umap-learn)\n", + " Downloading pynndescent-0.5.12-py3-none-any.whl (56 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.8/56.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from umap-learn) (4.66.4)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2023.12.25)\n", + "Collecting openai<2.0.0,>=1.24.0 (from langchain-openai)\n", + " Downloading openai-1.30.1-py3-none-any.whl (320 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m320.6/320.6 kB\u001b[0m \u001b[31m29.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)\n", + " Downloading types_requests-2.31.0.20240406-py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: build>=1.0.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.2.1)\n", + "Collecting chroma-hnswlib==0.7.3 (from chromadb)\n", + " Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m88.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting fastapi>=0.95.2 (from chromadb)\n", + " Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)\n", + " Downloading uvicorn-0.29.0-py3-none-any.whl (60 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting posthog>=2.4.0 (from chromadb)\n", + " Downloading posthog-3.5.0-py2.py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.11.0)\n", + "Collecting onnxruntime>=1.14.1 (from chromadb)\n", + " Downloading onnxruntime-1.17.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m100.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting opentelemetry-api>=1.2.0 (from chromadb)\n", + " Downloading opentelemetry_api-1.24.0-py3-none-any.whl (60 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.1/60.1 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)\n", + " Downloading opentelemetry_exporter_otlp_proto_grpc-1.24.0-py3-none-any.whl (18 kB)\n", + "Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromadb)\n", + " Downloading opentelemetry_instrumentation_fastapi-0.45b0-py3-none-any.whl (11 kB)\n", + "Collecting opentelemetry-sdk>=1.2.0 (from chromadb)\n", + " Downloading opentelemetry_sdk-1.24.0-py3-none-any.whl (106 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.1/106.1 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tokenizers>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.19.1)\n", + "Collecting pypika>=0.48.9 (from chromadb)\n", + " Downloading PyPika-0.48.9.tar.gz (67 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting overrides>=7.3.1 (from chromadb)\n", + " Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.4.0)\n", + "Requirement already satisfied: grpcio>=1.58.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.63.0)\n", + "Collecting bcrypt>=4.0.1 (from chromadb)\n", + " Downloading bcrypt-4.1.3-cp39-abi3-manylinux_2_28_x86_64.whl (283 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.9.4)\n", + "Collecting kubernetes>=28.1.0 (from chromadb)\n", + " Downloading kubernetes-29.0.0-py2.py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting mmh3>=4.0.1 (from chromadb)\n", + " Downloading mmh3-4.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (67 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting orjson>=3.9.12 (from chromadb)\n", + " Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting anthropic<1,>=0.23.0 (from langchain-anthropic)\n", + " Downloading anthropic-0.25.9-py3-none-any.whl (871 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m871.1/871.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: defusedxml<0.8.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from langchain-anthropic) (0.7.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from anthropic<1,>=0.23.0->langchain-anthropic) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from anthropic<1,>=0.23.0->langchain-anthropic) (1.7.0)\n", + "Collecting httpx<1,>=0.23.0 (from anthropic<1,>=0.23.0->langchain-anthropic)\n", + " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from anthropic<1,>=0.23.0->langchain-anthropic) (1.3.1)\n", + "Requirement already satisfied: packaging>=19.1 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (24.0)\n", + "Requirement already satisfied: pyproject_hooks in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (1.1.0)\n", + "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (2.0.1)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Collecting starlette<0.38.0,>=0.37.2 (from fastapi>=0.95.2->chromadb)\n", + " Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting fastapi-cli>=0.0.2 (from fastapi>=0.95.2->chromadb)\n", + " Downloading fastapi_cli-0.0.3-py3-none-any.whl (9.2 kB)\n", + "Requirement already satisfied: jinja2>=2.11.2 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (3.1.4)\n", + "Collecting python-multipart>=0.0.7 (from fastapi>=0.95.2->chromadb)\n", + " Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n", + "Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi>=0.95.2->chromadb)\n", + " Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting email_validator>=2.0.0 (from fastapi>=0.95.2->chromadb)\n", + " Downloading email_validator-2.1.1-py3-none-any.whl (30 kB)\n", + "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2024.2.2)\n", + "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.8.2)\n", + "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.27.0)\n", + "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.8.0)\n", + "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.3.1)\n", + "Requirement already satisfied: oauthlib>=3.2.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n", + "Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.0.7)\n", + "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.2.0,>=0.1.52->langchain)\n", + " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "Collecting packaging>=19.1 (from build>=1.0.3->chromadb)\n", + " Downloading packaging-23.2-py3-none-any.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.2->umap-learn) (0.41.1)\n", + "Collecting coloredlogs (from onnxruntime>=1.14.1->chromadb)\n", + " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (24.3.25)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (3.20.3)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", + "Collecting deprecated>=1.2.6 (from opentelemetry-api>=1.2.0->chromadb)\n", + " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", + "Collecting importlib-metadata<=7.0,>=6.0 (from opentelemetry-api>=1.2.0->chromadb)\n", + " Downloading importlib_metadata-7.0.0-py3-none-any.whl (23 kB)\n", + "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.0)\n", + "Collecting opentelemetry-exporter-otlp-proto-common==1.24.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n", + " Downloading opentelemetry_exporter_otlp_proto_common-1.24.0-py3-none-any.whl (17 kB)\n", + "Collecting opentelemetry-proto==1.24.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n", + " Downloading opentelemetry_proto-1.24.0-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting opentelemetry-instrumentation-asgi==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_instrumentation_asgi-0.45b0-py3-none-any.whl (14 kB)\n", + "Collecting opentelemetry-instrumentation==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_instrumentation-0.45b0-py3-none-any.whl (28 kB)\n", + "Collecting opentelemetry-semantic-conventions==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_semantic_conventions-0.45b0-py3-none-any.whl (36 kB)\n", + "Collecting opentelemetry-util-http==0.45b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_util_http-0.45b0-py3-none-any.whl (6.9 kB)\n", + "Requirement already satisfied: setuptools>=16.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (67.7.2)\n", + "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.14.1)\n", + "Collecting asgiref~=3.0 (from opentelemetry-instrumentation-asgi==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading asgiref-3.8.1-py3-none-any.whl (23 kB)\n", + "Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb)\n", + " Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n", + "Collecting backoff>=1.10.0 (from posthog>=2.4.0->chromadb)\n", + " Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (2.18.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.7)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.13.2->chromadb) (0.20.3)\n", + "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (8.1.7)\n", + "Collecting h11>=0.8 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting httptools>=0.5.0 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m37.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting python-dotenv>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m103.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading watchfiles-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m74.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting websockets>=10.4 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->anthropic<1,>=0.23.0->langchain-anthropic) (1.2.1)\n", + "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi>=0.95.2->chromadb)\n", + " Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting typer>=0.9.0 (from chromadb)\n", + " Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting shellingham>=1.3.0 (from typer>=0.9.0->chromadb)\n", + " Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (13.7.1)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n", + "Collecting httpcore==1.* (from httpx<1,>=0.23.0->anthropic<1,>=0.23.0->langchain-anthropic)\n", + " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (3.14.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (2023.6.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=7.0,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.18.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.2->fastapi>=0.95.2->chromadb) (2.1.5)\n", + "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.2.0,>=0.1.52->langchain)\n", + " Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.16.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb)\n", + " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer>=0.9.0->chromadb) (0.1.2)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.6.0)\n", + "Building wheels for collected packages: pypika\n", + " Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pypika: filename=PyPika-0.48.9-py2.py3-none-any.whl size=53724 sha256=0173162f2de8f5b354e35275c3885edf958f5c4a6a295441c8e8ef5557ae7f2e\n", + " Stored in directory: /root/.cache/pip/wheels/e1/26/51/d0bffb3d2fd82256676d7ad3003faea3bd6dddc9577af665f4\n", + "Successfully built pypika\n", + "Installing collected packages: pypika, monotonic, mmh3, websockets, uvloop, ujson, types-requests, shellingham, python-multipart, python-dotenv, packaging, overrides, orjson, opentelemetry-util-http, opentelemetry-semantic-conventions, opentelemetry-proto, mypy-extensions, jsonpointer, importlib-metadata, humanfriendly, httptools, h11, dnspython, deprecated, chroma-hnswlib, bcrypt, backoff, asgiref, watchfiles, uvicorn, typing-inspect, tiktoken, starlette, scikit-learn, posthog, opentelemetry-exporter-otlp-proto-common, opentelemetry-api, marshmallow, langchainhub, jsonpatch, httpcore, email_validator, coloredlogs, typer, pynndescent, opentelemetry-sdk, opentelemetry-instrumentation, onnxruntime, langsmith, kubernetes, httpx, dataclasses-json, umap-learn, opentelemetry-instrumentation-asgi, opentelemetry-exporter-otlp-proto-grpc, openai, langchain-core, anthropic, opentelemetry-instrumentation-fastapi, langchain-text-splitters, langchain-openai, langchain_community, langchain-anthropic, langchain, fastapi-cli, fastapi, chromadb\n", + " Attempting uninstall: packaging\n", + " Found existing installation: packaging 24.0\n", + " Uninstalling packaging-24.0:\n", + " Successfully uninstalled packaging-24.0\n", + " Attempting uninstall: importlib-metadata\n", + " Found existing installation: importlib_metadata 7.1.0\n", + " Uninstalling importlib_metadata-7.1.0:\n", + " Successfully uninstalled importlib_metadata-7.1.0\n", + " Attempting uninstall: scikit-learn\n", + " Found existing installation: scikit-learn 1.2.2\n", + " Uninstalling scikit-learn-1.2.2:\n", + " Successfully uninstalled scikit-learn-1.2.2\n", + " Attempting uninstall: typer\n", + " Found existing installation: typer 0.9.4\n", + " Uninstalling typer-0.9.4:\n", + " Successfully uninstalled typer-0.9.4\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n", + "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed anthropic-0.25.9 asgiref-3.8.1 backoff-2.2.1 bcrypt-4.1.3 chroma-hnswlib-0.7.3 chromadb-0.5.0 coloredlogs-15.0.1 dataclasses-json-0.6.6 deprecated-1.2.14 dnspython-2.6.1 email_validator-2.1.1 fastapi-0.111.0 fastapi-cli-0.0.3 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 humanfriendly-10.0 importlib-metadata-7.0.0 jsonpatch-1.33 jsonpointer-2.4 kubernetes-29.0.0 langchain-0.1.20 langchain-anthropic-0.1.12 langchain-core-0.1.52 langchain-openai-0.1.7 langchain-text-splitters-0.0.2 langchain_community-0.0.38 langchainhub-0.1.15 langsmith-0.1.59 marshmallow-3.21.2 mmh3-4.1.0 monotonic-1.6 mypy-extensions-1.0.0 onnxruntime-1.17.3 openai-1.30.1 opentelemetry-api-1.24.0 opentelemetry-exporter-otlp-proto-common-1.24.0 opentelemetry-exporter-otlp-proto-grpc-1.24.0 opentelemetry-instrumentation-0.45b0 opentelemetry-instrumentation-asgi-0.45b0 opentelemetry-instrumentation-fastapi-0.45b0 opentelemetry-proto-1.24.0 opentelemetry-sdk-1.24.0 opentelemetry-semantic-conventions-0.45b0 opentelemetry-util-http-0.45b0 orjson-3.10.3 overrides-7.7.0 packaging-23.2 posthog-3.5.0 pynndescent-0.5.12 pypika-0.48.9 python-dotenv-1.0.1 python-multipart-0.0.9 scikit-learn-1.4.2 shellingham-1.5.4 starlette-0.37.2 tiktoken-0.7.0 typer-0.12.3 types-requests-2.31.0.20240406 typing-inspect-0.9.0 ujson-5.10.0 umap-learn-0.5.6 uvicorn-0.29.0 uvloop-0.19.0 watchfiles-0.21.0 websockets-12.0\n" + ] + } + ], + "source": [ + "pip install -U langchain umap-learn scikit-learn langchain_community tiktoken langchain-openai langchainhub chromadb langchain-anthropic" + ] + }, + { + "cell_type": "markdown", + "id": "ea54c848-0df6-474e-b266-218a2acf67d3", + "metadata": { + "id": "ea54c848-0df6-474e-b266-218a2acf67d3" + }, + "source": [ + "# RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval\n", + "\n", + "The [RAPTOR](https://arxiv.org/pdf/2401.18059.pdf) paper presents an interesting approaching for indexing and retrieval of documents:\n", + "\n", + "* The `leafs` are a set of starting documents\n", + "* Leafs are embedded and clustered\n", + "* Clusters are then summarized into higher level (more abstract) consolidations of information across similar documents\n", + "\n", + "This process is done recursivly, resulting in a \"tree\" going from raw docs (`leafs`) to more abstract summaries.\n", + "\n", + "We can applying this at varying scales; `leafs` can be:\n", + "\n", + "* Text chunks from a single doc (as shown in the paper)\n", + "* Full docs (as we show below)\n", + "\n", + "With longer context LLMs, it's possible to perform this over full documents.\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "# Optional, add tracing in LangSmith\n", + "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", + "os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n", + "os.environ[\"LANGCHAIN_PROJECT\"] = \"RAPTOR\"\n", + "os.environ['LANGCHAIN_API_KEY'] = ''" + ], + "metadata": { + "id": "lk-aKD_W1kwq" + }, + "id": "lk-aKD_W1kwq", + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "083dd961-b401-4fc6-867c-8f8950059b02", + "metadata": { + "id": "083dd961-b401-4fc6-867c-8f8950059b02" + }, + "source": [ + "### Docs\n", + "\n", + "Let's apply this to LangChain's LCEL documentation.\n", + "\n", + "In this case, each `doc` is a unique web page of the LCEL docs.\n", + "\n", + "The context varies from < 2k tokens on up to > 10k tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b17c1331-373f-491d-8b53-ccf634e68c8e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 733 + }, + "id": "b17c1331-373f-491d-8b53-ccf634e68c8e", + "outputId": "37db4d71-7316-4525-b04d-53d48a23c5eb" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "<function matplotlib.pyplot.show(close=None, block=None)>" + ], + "text/html": [ + "<div style=\"max-width:800px; border: 1px solid var(--colab-border-color);\"><style>\n", + " pre.function-repr-contents {\n", + " overflow-x: auto;\n", + " padding: 8px 12px;\n", + " max-height: 500px;\n", + " }\n", + "\n", + " pre.function-repr-contents.function-repr-contents-collapsed {\n", + " cursor: pointer;\n", + " max-height: 100px;\n", + " }\n", + " </style>\n", + " <pre style=\"white-space: initial; background:\n", + " var(--colab-secondary-surface-color); padding: 8px 12px;\n", + " border-bottom: 1px solid var(--colab-border-color);\"><b>matplotlib.pyplot.show</b><br/>def show(*args, **kwargs)</pre><pre class=\"function-repr-contents function-repr-contents-collapsed\" style=\"\"><a class=\"filepath\" style=\"display:none\" href=\"#\">/usr/local/lib/python3.10/dist-packages/matplotlib/pyplot.py</a>Display all open figures.\n", + "\n", + "Parameters\n", + "----------\n", + "block : bool, optional\n", + " Whether to wait for all figures to be closed before returning.\n", + "\n", + " If `True` block and run the GUI main loop until all figure windows\n", + " are closed.\n", + "\n", + " If `False` ensure that all figure windows are displayed and return\n", + " immediately. In this case, you are responsible for ensuring\n", + " that the event loop is running to have responsive figures.\n", + "\n", + " Defaults to True in non-interactive mode and to False in interactive\n", + " mode (see `.pyplot.isinteractive`).\n", + "\n", + "See Also\n", + "--------\n", + "ion : Enable interactive mode, which shows / updates the figure after\n", + " every plotting command, so that calling ``show()`` is not necessary.\n", + "ioff : Disable interactive mode.\n", + "savefig : Save the figure to an image file instead of showing it on screen.\n", + "\n", + "Notes\n", + "-----\n", + "**Saving figures to file and showing a window at the same time**\n", + "\n", + "If you want an image file as well as a user interface window, use\n", + "`.pyplot.savefig` before `.pyplot.show`. At the end of (a blocking)\n", + "``show()`` the figure is closed and thus unregistered from pyplot. Calling\n", + "`.pyplot.savefig` afterwards would save a new and thus empty figure. This\n", + "limitation of command order does not apply if the show is non-blocking or\n", + "if you keep a reference to the figure and use `.Figure.savefig`.\n", + "\n", + "**Auto-show in jupyter notebooks**\n", + "\n", + "The jupyter backends (activated via ``%matplotlib inline``,\n", + "``%matplotlib notebook``, or ``%matplotlib widget``), call ``show()`` at\n", + "the end of every cell by default. Thus, you usually don't have to call it\n", + "explicitly there.</pre>\n", + " <script>\n", + " if (google.colab.kernel.accessAllowed && google.colab.files && google.colab.files.view) {\n", + " for (const element of document.querySelectorAll('.filepath')) {\n", + " element.style.display = 'block'\n", + " element.onclick = (event) => {\n", + " event.preventDefault();\n", + " event.stopPropagation();\n", + " google.colab.files.view(element.textContent, 401);\n", + " };\n", + " }\n", + " }\n", + " for (const element of document.querySelectorAll('.function-repr-contents')) {\n", + " element.onclick = (event) => {\n", + " event.preventDefault();\n", + " event.stopPropagation();\n", + " element.classList.toggle('function-repr-contents-collapsed');\n", + " };\n", + " }\n", + " </script>\n", + " </div>" + ] + }, + "metadata": {}, + "execution_count": 3 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<Figure size 1000x600 with 1 Axes>" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIjCAYAAADvBuGTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABR8UlEQVR4nO3deXyM5/7/8fdMRlaSICRSWyyl1hSlKS0qFUsVPW3RxVLVjZYT1ZaeIuU0uthaWnVawjmKo6elv1JFbFUpDVJFrQ3aSmKNSESYzP37w8N8OxIkccckvJ6PxzyOue7Pfd3XNe723O/e91xjMQzDEAAAAADguljdPQAAAAAAuBkQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAKCEqlmzpvr37+/uYdz03nvvPdWqVUseHh4KDw8v1mOtXbtWFotFX3zxRbEeBwDgHoQrALgB4uLiZLFYlJiYmO/2du3aqVGjRtd9nGXLlmns2LHX3c+tYsWKFXr11VfVunVrzZ49W2+//XaemkuBqCCv0ujcuXOaPHmyWrVqpYCAAHl7e+v222/XkCFDtHfvXncPT5K0ceNGjR07Vunp6e4eCgBclc3dAwAA5G/Pnj2yWgv338CWLVum6dOnE7AKaPXq1bJarfrss8/k6emZb80dd9yhf//73y5tI0eOVNmyZfXGG2/ciGEWm+PHj6tTp07asmWLHnzwQT3++OMqW7as9uzZowULFmjmzJk6f/68u4epjRs3KiYmRv3791dgYKC7hwMAV0S4AoASysvLy91DKLSsrCz5+fm5exgFdvToUfn4+FwxWElScHCwnnzySZe2CRMmKCgoKE97adO/f39t27ZNX3zxhf72t7+5bBs3blypD48AcKPxWCAAlFCXf+fqwoULiomJUd26deXt7a2KFSuqTZs2WrlypaSLF8rTp0+XpHwfVcvKytLw4cNVrVo1eXl5qV69enr//fdlGIbLcbOzs/Xyyy8rKChI5cqV00MPPaQ///xTFovF5Y7Y2LFjZbFYtGvXLj3++OMqX7682rRpI0navn27+vfvr1q1asnb21shISF6+umndeLECZdjXepj7969evLJJxUQEKBKlSrpzTfflGEY+v3339W9e3f5+/srJCREEydOLNBnZ7fbNW7cONWuXVteXl6qWbOmRo0apZycHGeNxWLR7NmzlZWV5fys4uLiCtR/fn777Tc9+uijqlChgnx9fXX33Xdr6dKl19wvJydHDz74oAICArRx40ZJksPh0JQpU9SwYUN5e3srODhYzz33nE6dOuWyb82aNfXggw9qw4YNatmypby9vVWrVi3NnTv3msfdtGmTli5dqoEDB+YJVtLFcP/++++7tK1evVr33nuv/Pz8FBgYqO7du+vXX391qenfv79q1qyZp79Lf9d/ZbFYNGTIEC1evFiNGjWSl5eXGjZsqOXLl7vsN2LECElSWFiY8+/q4MGDkqSVK1eqTZs2CgwMVNmyZVWvXj2NGjXqmvMHgOLAnSsAuIFOnz6t48eP52m/cOHCNfcdO3asYmNj9cwzz6hly5bKyMhQYmKitm7dqgceeEDPPfecjhw5opUrV+Z5jM0wDD300ENas2aNBg4cqPDwcH333XcaMWKE/vzzT02ePNlZ279/f/33v//VU089pbvvvlvr1q1T165drziuRx99VHXr1tXbb7/tDGorV67Ub7/9pgEDBigkJEQ7d+7UzJkztXPnTv344495LrJ79eqlO+64QxMmTNDSpUs1fvx4VahQQZ988onuv/9+vfPOO5o3b55eeeUV3XXXXbrvvvuu+lk988wzmjNnjh555BENHz5cmzZtUmxsrH799Vd99dVXkqR///vfmjlzpjZv3qxPP/1UknTPPfdc8+8hP2lpabrnnnt09uxZvfzyy6pYsaLmzJmjhx56SF988YV69uyZ737Z2dnq3r27EhMTtWrVKt11112SpOeee05xcXEaMGCAXn75ZSUnJ2vatGnatm2bfvjhB5UpU8bZx/79+/XII49o4MCB6tevn2bNmqX+/furefPmatiw4RXH/PXXX0uSnnrqqQLNcdWqVercubNq1aqlsWPHKjs7Wx9++KFat26trVu35huoCmLDhg368ssv9eKLL6pcuXL64IMP9Le//U2HDx9WxYoV9fDDD2vv3r2aP3++Jk+erKCgIElSpUqVtHPnTj344INq0qSJ3nrrLXl5eWn//v364YcfijQWALhuBgCg2M2ePduQdNVXw4YNXfapUaOG0a9fP+f7pk2bGl27dr3qcQYPHmzk96/2xYsXG5KM8ePHu7Q/8sgjhsViMfbv328YhmFs2bLFkGQMGzbMpa5///6GJGPMmDHOtjFjxhiSjD59+uQ53tmzZ/O0zZ8/35BkrF+/Pk8fzz77rLPNbrcbVatWNSwWizFhwgRn+6lTpwwfHx+XzyQ/SUlJhiTjmWeecWl/5ZVXDEnG6tWrnW39+vUz/Pz8rtpffho2bGi0bdvW+X7YsGGGJOP77793tp05c8YICwszatasaeTm5hqGYRhr1qwxJBmLFi0yzpw5Y7Rt29YICgoytm3b5tzv+++/NyQZ8+bNcznm8uXL87TXqFEjz2d69OhRw8vLyxg+fPhV59CzZ09DknHq1KkCzTk8PNyoXLmyceLECWfbzz//bFitVqNv377Otn79+hk1atTIs/+lv+u/kmR4eno6z79LfUoyPvzwQ2fbe++9Z0gykpOTXfafPHmyIck4duxYgeYAAMWNxwIB4AaaPn26Vq5cmefVpEmTa+4bGBionTt3at++fYU+7rJly+Th4aGXX37ZpX348OEyDEPffvutJDkfx3rxxRdd6l566aUr9v3888/nafPx8XH++dy5czp+/LjuvvtuSdLWrVvz1D/zzDPOP3t4eKhFixYyDEMDBw50tgcGBqpevXr67bffrjgW6eJcJSk6Otqlffjw4ZJUoEf1CmvZsmVq2bKl87FISSpbtqyeffZZHTx4ULt27XKpP336tDp27Kjdu3dr7dq1LkvAL1q0SAEBAXrggQd0/Phx56t58+YqW7as1qxZ49JXgwYNdO+99zrfV6pUqUCfU0ZGhiSpXLly15xfSkqKkpKS1L9/f1WoUMHZ3qRJEz3wwAPOz7woIiMjVbt2bZc+/f39rzl+Sc7FLZYsWSKHw1HkMQCAWQhXAHADtWzZUpGRkXle5cuXv+a+b731ltLT03X77bercePGGjFihLZv316g4x46dEihoaF5LqTvuOMO5/ZL/2u1WhUWFuZSV6dOnSv2fXmtJJ08eVJDhw5VcHCwfHx8VKlSJWfd6dOn89RXr17d5f2lJcEvPQL21/bLv3d0uUtzuHzMISEhCgwMdM7VTIcOHVK9evXytF/++V4ybNgw/fTTT1q1alWeR/f27dun06dPq3LlyqpUqZLLKzMzU0ePHnWpv/yzk6Ty5ctf83Py9/eXJJ05c6ZA85N0xTkeP35cWVlZ1+wnP0Udv3TxcdLWrVvrmWeeUXBwsHr37q3//ve/BC0AbsN3rgCglLjvvvt04MABLVmyRCtWrNCnn36qyZMna8aMGS53fm60v96luuSxxx7Txo0bNWLECIWHh6ts2bJyOBzq1KlTvhe+Hh4eBWqTlGcBjispyb871b17dy1YsEATJkzQ3LlzXZbcdzgcqly5subNm5fvvpUqVXJ5X9TPqX79+pKkX375xeXO1/W60ueem5ubb/v1/D37+Pho/fr1WrNmjZYuXarly5dr4cKFuv/++7VixYor9g0AxYU7VwBQilSoUEEDBgzQ/Pnz9fvvv6tJkyYuK/hd6cK2Ro0aOnLkSJ67FLt373Zuv/S/DodDycnJLnX79+8v8BhPnTql+Ph4vf7664qJiVHPnj31wAMPqFatWgXu43pcmsPlj0+mpaUpPT3dOVezj7lnz5487Zd/vpf06NFDs2bN0ueff67Bgwe7bKtdu7ZOnDih1q1b53uXs2nTpqaMuVu3bpKk//znP9esvTT+K80xKCjIuQR/+fLl8/2x3+u5Y3i1oGy1WtWhQwdNmjRJu3bt0j//+U+tXr06z+OTAHAjEK4AoJS4fBnzsmXLqk6dOi7Li1+6wL384rZLly7Kzc3VtGnTXNonT54si8Wizp07S5KioqIkSR999JFL3YcffljgcV66W3D5nYcpU6YUuI/r0aVLl3yPN2nSJEm66sqH13PMzZs3KyEhwdmWlZWlmTNnqmbNmmrQoEGeffr27asPPvhAM2bM0GuvveZsf+yxx5Sbm6tx48bl2cdut+cbXIoiIiJCnTp10qeffqrFixfn2X7+/Hm98sorkqQqVaooPDxcc+bMcTn+jh07tGLFCudnLl0Mh6dPn3Z5ZDUlJcW5SmNRXOm8PnnyZJ7aS99f++s/FwBwo/BYIACUEg0aNFC7du3UvHlzVahQQYmJifriiy80ZMgQZ03z5s0lSS+//LKioqLk4eGh3r17q1u3bmrfvr3eeOMNHTx4UE2bNtWKFSu0ZMkSDRs2zLmgQPPmzfW3v/1NU6ZM0YkTJ5xLse/du1dSwR618/f313333ad3331XFy5c0G233aYVK1bkuRtWXJo2bap+/fpp5syZSk9PV9u2bbV582bNmTNHPXr0UPv27U0/5uuvv6758+erc+fOevnll1WhQgXNmTNHycnJ+t///ufy2N9fDRkyRBkZGXrjjTcUEBCgUaNGqW3btnruuecUGxurpKQkdezYUWXKlNG+ffu0aNEiTZ06VY888ogp4547d646duyohx9+WN26dVOHDh3k5+enffv2acGCBUpJSXH+1tV7772nzp07KyIiQgMHDnQuxR4QEOBy97R379567bXX1LNnT7388ss6e/asPv74Y91+++35LmZSEJfO6zfeeEO9e/dWmTJl1K1bN7311ltav369unbtqho1aujo0aP66KOPVLVqVZfFRQDghnHnUoUAcKu4tBT7Tz/9lO/2tm3bXnMp9vHjxxstW7Y0AgMDDR8fH6N+/frGP//5T+P8+fPOGrvdbrz00ktGpUqVDIvF4rL09ZkzZ4y///3vRmhoqFGmTBmjbt26xnvvvWc4HA6X42ZlZRmDBw82KlSoYJQtW9bo0aOHsWfPHkOSy9Lol5bWzm8Z7D/++MPo2bOnERgYaAQEBBiPPvqoceTIkSsu5355H1daIj2/zyk/Fy5cMGJiYoywsDCjTJkyRrVq1YyRI0ca586dK9BxruXypdgNwzAOHDhgPPLII0ZgYKDh7e1ttGzZ0vjmm29cav66FPtfvfrqq4YkY9q0ac62mTNnGs2bNzd8fHyMcuXKGY0bNzZeffVV48iRI86aGjVq5Ls8f9u2bfOM70rOnj1rvP/++8Zdd91llC1b1vD09DTq1q1rvPTSSy5LpBuGYaxatcpo3bq14ePjY/j7+xvdunUzdu3alafPFStWGI0aNTI8PT2NevXqGf/5z3+uuBT74MGD8+x/+blvGIYxbtw447bbbjOsVqtzWfb4+Hije/fuRmhoqOHp6WmEhoYaffr0Mfbu3VuguQOA2SyGUcBvBgMAbllJSUm688479Z///EdPPPGEu4cDAECJxHeuAAAusrOz87RNmTJFVqtV9913nxtGBABA6cB3rgAALt59911t2bJF7du3l81m07fffqtvv/1Wzz77rKpVq+bu4QEAUGLxWCAAwMXKlSsVExOjXbt2KTMzU9WrV9dTTz2lN954QzYb/00OAIArIVwBAAAAgAn4zhUAAAAAmIBwBQAAAAAm4OH5fDgcDh05ckTlypUr0A9mAgAAALg5GYahM2fOKDQ09Io/Cn8J4SofR44cYUUsAAAAAE6///67qlatetUawlU+ypUrJ+niB+jv7+/m0QAAAABwl4yMDFWrVs2ZEa6GcJWPS48C+vv7E64AAAAAFOjrQixoAQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAreGq9jYWN11110qV66cKleurB49emjPnj3X3G/RokWqX7++vL291bhxYy1btsxlu2EYGj16tKpUqSIfHx9FRkZq3759xTUNAAAAAHBvuFq3bp0GDx6sH3/8UStXrtSFCxfUsWNHZWVlXXGfjRs3qk+fPho4cKC2bdumHj16qEePHtqxY4ez5t1339UHH3ygGTNmaNOmTfLz81NUVJTOnTt3I6YFAAAA4BZkMQzDcPcgLjl27JgqV66sdevW6b777su3plevXsrKytI333zjbLv77rsVHh6uGTNmyDAMhYaGavjw4XrllVckSadPn1ZwcLDi4uLUu3fva44jIyNDAQEBOn36tPz9/c2ZHAAAAIBSpzDZwHaDxlQgp0+fliRVqFDhijUJCQmKjo52aYuKitLixYslScnJyUpNTVVkZKRze0BAgFq1aqWEhIR8w1VOTo5ycnKc7zMyMiRJdrtddru9yPMBAAAAblbHjx/XmTNniq3/cuXKKSgoqNj6L6jC5IESE64cDoeGDRum1q1bq1GjRlesS01NVXBwsEtbcHCwUlNTndsvtV2p5nKxsbGKiYnJ056YmCg/P79CzQMAAAC42Z0/f167du3VhQuOYjtGmTJWNWhwuzw9PYvtGAVxta8sXa7EhKvBgwdrx44d2rBhww0/9siRI13uhmVkZKhatWpq0aIFjwUCAAAAl0lOTtZrr02Vl9dQ+fhUNb3/7Ow/lJMzVfPm3a+wsDDT+y+MS0+1FUSJCFdDhgzRN998o/Xr16tq1av/5YSEhCgtLc2lLS0tTSEhIc7tl9qqVKniUhMeHp5vn15eXvLy8srTbrPZZLOViI8IAAAAKDGsVqvs9lyVLVtdXl61Te/fbrcqKytXVqvV7dfjhTm+W1cLNAxDQ4YM0VdffaXVq1cXKJVGREQoPj7epW3lypWKiIiQJIWFhSkkJMSlJiMjQ5s2bXLWAAAAAIDZ3BoDBw8erM8//1xLlixRuXLlnN+JCggIkI+PjySpb9++uu222xQbGytJGjp0qNq2bauJEyeqa9euWrBggRITEzVz5kxJksVi0bBhwzR+/HjVrVtXYWFhevPNNxUaGqoePXq4ZZ4AAAAAbn5uDVcff/yxJKldu3Yu7bNnz1b//v0lSYcPH5bV+n832O655x59/vnn+sc//qFRo0apbt26Wrx4scsiGK+++qqysrL07LPPKj09XW3atNHy5cvl7e1d7HMCAAAAcGtya7gqyE9srV27Nk/bo48+qkcfffSK+1gsFr311lt66623rmd4AAAAAFBgbv3OFQAAAADcLAhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJnBruFq/fr26deum0NBQWSwWLV68+Kr1/fv3l8ViyfNq2LChs2bs2LF5ttevX7+YZwIAAADgVufWcJWVlaWmTZtq+vTpBaqfOnWqUlJSnK/ff/9dFSpU0KOPPupS17BhQ5e6DRs2FMfwAQAAAMDJ5s6Dd+7cWZ07dy5wfUBAgAICApzvFy9erFOnTmnAgAEudTabTSEhIaaNEwAAAACuxa3h6np99tlnioyMVI0aNVza9+3bp9DQUHl7eysiIkKxsbGqXr36FfvJyclRTk6O831GRoYkyW63y263F8/gAQAAgFLK4XDIZvOQzeaQh4f518s228X+HQ6H26/HC3P8Uhuujhw5om+//Vaff/65S3urVq0UFxenevXqKSUlRTExMbr33nu1Y8cOlStXLt++YmNjFRMTk6c9MTFRfn5+xTJ+AAAAoLTKzs7W449HyWY7JA+Po6b3n5ubLbs9SocOHdLRo+b3XxhZWVkFrrUYhmEU41gKzGKx6KuvvlKPHj0KVB8bG6uJEyfqyJEj8vT0vGJdenq6atSooUmTJmngwIH51uR356patWo6ceKE/P39CzUPAAAA4GaXnJysJ54YocDA9+TrG2Z6/2fPJis9fYTmzXtPYWHm918YGRkZqlixok6fPn3NbFAq71wZhqFZs2bpqaeeumqwkqTAwEDdfvvt2r9//xVrvLy85OXllafdZrPJZiuVHxEAAABQbKxWq+z2XNntVuXmmn+9bLdf7N9qtbr9erwwxy+Vv3O1bt067d+//4p3ov4qMzNTBw4cUJUqVW7AyAAAAADcqtwarjIzM5WUlKSkpCRJF28vJiUl6fDhw5KkkSNHqm/fvnn2++yzz9SqVSs1atQoz7ZXXnlF69at08GDB7Vx40b17NlTHh4e6tOnT7HOBQAAAMCtza332BITE9W+fXvn++joaElSv379FBcXp5SUFGfQuuT06dP63//+p6lTp+bb5x9//KE+ffroxIkTqlSpktq0aaMff/xRlSpVKr6JAAAAALjluTVctWvXTldbTyMuLi5PW0BAgM6ePXvFfRYsWGDG0AAAAACgUErld64AAAAAoKQhXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJjAreFq/fr16tatm0JDQ2WxWLR48eKr1q9du1YWiyXPKzU11aVu+vTpqlmzpry9vdWqVStt3ry5GGcBAAAAAG4OV1lZWWratKmmT59eqP327NmjlJQU56ty5crObQsXLlR0dLTGjBmjrVu3qmnTpoqKitLRo0fNHj4AAAAAONncefDOnTurc+fOhd6vcuXKCgwMzHfbpEmTNGjQIA0YMECSNGPGDC1dulSzZs3S66+/fj3DBQAAAIArcmu4Kqrw8HDl5OSoUaNGGjt2rFq3bi1JOn/+vLZs2aKRI0c6a61WqyIjI5WQkHDF/nJycpSTk+N8n5GRIUmy2+2y2+3FNAsAAACgdHI4HLLZPGSzOeThYf71ss12sX+Hw+H26/HCHL9UhasqVapoxowZatGihXJycvTpp5+qXbt22rRpk5o1a6bjx48rNzdXwcHBLvsFBwdr9+7dV+w3NjZWMTExedoTExPl5+dn+jwAAACA0iw7O1uPPx4lm+2QPDzM//pNbm627PYoHTp0yO1f78nKyipwbakKV/Xq1VO9evWc7++55x4dOHBAkydP1r///e8i9zty5EhFR0c732dkZKhatWpq0aKF/P39r2vMAAAAwM0mOTlZo0ZNU2BgpHx9w0zv/+zZZKWnT9O8eZEKCzO//8K49FRbQZSqcJWfli1basOGDZKkoKAgeXh4KC0tzaUmLS1NISEhV+zDy8tLXl5eedptNptstlL/EQEAAACmslqtsttzZbdblZtr/vWy3X6xf6vV6vbr8cIcv9T/zlVSUpKqVKkiSfL09FTz5s0VHx/v3O5wOBQfH6+IiAh3DREAAADALcCtMTAzM1P79+93vk9OTlZSUpIqVKig6tWra+TIkfrzzz81d+5cSdKUKVMUFhamhg0b6ty5c/r000+1evVqrVixwtlHdHS0+vXrpxYtWqhly5aaMmWKsrKynKsHAgAAAEBxcGu4SkxMVPv27Z3vL33vqV+/foqLi1NKSooOHz7s3H7+/HkNHz5cf/75p3x9fdWkSROtWrXKpY9evXrp2LFjGj16tFJTUxUeHq7ly5fnWeQCAAAAAMxkMQzDcPcgSpqMjAwFBATo9OnTLGgBAAAAXObAgQN69NFhCgycIj+/2qb3n5V1QOnpw7Ro0RTVrm1+/4VRmGxQ6r9zBQAAAAAlAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABG4NV+vXr1e3bt0UGhoqi8WixYsXX7X+yy+/1AMPPKBKlSrJ399fERER+u6771xqxo4dK4vF4vKqX79+Mc4CAAAAANwcrrKystS0aVNNnz69QPXr16/XAw88oGXLlmnLli1q3769unXrpm3btrnUNWzYUCkpKc7Xhg0bimP4AAAAAOBkc+fBO3furM6dOxe4fsqUKS7v3377bS1ZskT/7//9P915553OdpvNppCQELOGCQAAAADX5NZwdb0cDofOnDmjChUquLTv27dPoaGh8vb2VkREhGJjY1W9evUr9pOTk6OcnBzn+4yMDEmS3W6X3W4vnsEDAAAApZTD4ZDN5iGbzSEPD/Ovl222i/07HA63X48X5vilOly9//77yszM1GOPPeZsa9WqleLi4lSvXj2lpKQoJiZG9957r3bs2KFy5crl209sbKxiYmLytCcmJsrPz6/Yxg8AAACURtnZ2Xr88SjZbIfk4XHU9P5zc7Nlt0fp0KFDOnrU/P4LIysrq8C1FsMwjGIcS4FZLBZ99dVX6tGjR4HqP//8cw0aNEhLlixRZGTkFevS09NVo0YNTZo0SQMHDsy3Jr87V9WqVdOJEyfk7+9fqHkAAAAAN7vk5GQ98cQIBQa+J1/fMNP7P3s2WenpIzRv3nsKCzO//8LIyMhQxYoVdfr06Wtmg1J552rBggV65plntGjRoqsGK0kKDAzU7bffrv3791+xxsvLS15eXnnabTabbLZS+REBAAAAxcZqtcpuz5XdblVurvnXy3b7xf6tVqvbr8cLc/xS9ztX8+fP14ABAzR//nx17dr1mvWZmZk6cOCAqlSpcgNGBwAAAOBW5dYYmJmZ6XJHKTk5WUlJSapQoYKqV6+ukSNH6s8//9TcuXMlXXwUsF+/fpo6dapatWql1NRUSZKPj48CAgIkSa+88oq6deumGjVq6MiRIxozZow8PDzUp0+fGz9BAAAAALcMt965SkxM1J133ulcRj06Olp33nmnRo8eLUlKSUnR4cOHnfUzZ86U3W7X4MGDVaVKFedr6NChzpo//vhDffr0Ub169fTYY4+pYsWK+vHHH1WpUqUbOzkAAAAAtxS33rlq166drraeRlxcnMv7tWvXXrPPBQsWXOeoAAAAAKDwSt13rgAAAACgJCJcAQAAAIAJCFcAAAAAYALCFQAAAACYoEjh6rfffjN7HAAAAABQqhUpXNWpU0ft27fXf/7zH507d87sMQEAAABAqVOkcLV161Y1adJE0dHRCgkJ0XPPPafNmzebPTYAAAAAKDWKFK7Cw8M1depUHTlyRLNmzVJKSoratGmjRo0aadKkSTp27JjZ4wQAAACAEu26FrSw2Wx6+OGHtWjRIr3zzjvav3+/XnnlFVWrVk19+/ZVSkqKWeMEAAAAgBLtusJVYmKiXnzxRVWpUkWTJk3SK6+8ogMHDmjlypU6cuSIunfvbtY4AQAAAKBEsxVlp0mTJmn27Nnas2ePunTporlz56pLly6yWi9mtbCwMMXFxalmzZpmjhUAAAAASqwihauPP/5YTz/9tPr3768qVarkW1O5cmV99tln1zU4AAAAACgtihSu9u3bd80aT09P9evXryjdAwAAAECpU6TvXM2ePVuLFi3K075o0SLNmTPnugcFAAAAAKVNkcJVbGysgoKC8rRXrlxZb7/99nUPCgAAAABKmyKFq8OHDyssLCxPe40aNXT48OHrHhQAAAAAlDZFCleVK1fW9u3b87T//PPPqlix4nUPCgAAAABKmyKFqz59+ujll1/WmjVrlJubq9zcXK1evVpDhw5V7969zR4jAAAAAJR4RVotcNy4cTp48KA6dOggm+1iFw6HQ3379uU7VwAAAABuSUUKV56enlq4cKHGjRunn3/+WT4+PmrcuLFq1Khh9vgAAAAAoFQoUri65Pbbb9ftt99u1lgAAAAAoNQqUrjKzc1VXFyc4uPjdfToUTkcDpftq1evNmVwAAAAAFBaFClcDR06VHFxceratasaNWoki8Vi9rgAAAAAoFQpUrhasGCB/vvf/6pLly5mjwcAAAAASqUiLcXu6empOnXqmD0WAAAAACi1ihSuhg8frqlTp8owDLPHAwAAAAClUpEeC9ywYYPWrFmjb7/9Vg0bNlSZMmVctn/55ZemDA4AAAAASosihavAwED17NnT7LEAAAAAQKlVpHA1e/Zss8cBAAAAAKVakb5zJUl2u12rVq3SJ598ojNnzkiSjhw5oszMTNMGBwAAAAClRZHuXB06dEidOnXS4cOHlZOTowceeEDlypXTO++8o5ycHM2YMcPscQIAAABAiVakO1dDhw5VixYtdOrUKfn4+Djbe/bsqfj4eNMGBwAAAAClRZHuXH3//ffauHGjPD09Xdpr1qypP//805SBAQAAAEBpUqQ7Vw6HQ7m5uXna//jjD5UrV+66BwUAAAAApU2RwlXHjh01ZcoU53uLxaLMzEyNGTNGXbp0MWtsAAAAAFBqFOmxwIkTJyoqKkoNGjTQuXPn9Pjjj2vfvn0KCgrS/PnzzR4jAAAAAJR4RQpXVatW1c8//6wFCxZo+/btyszM1MCBA/XEE0+4LHABAAAAALeKIoUrSbLZbHryySfNHAsAAAAAlFpFCldz58696va+ffsWaTAAAAAAUFoVKVwNHTrU5f2FCxd09uxZeXp6ytfXl3AFAAAA4JZTpNUCT5065fLKzMzUnj171KZNGxa0AAAAAHBLKlK4yk/dunU1YcKEPHe1AAAAAOBWYFq4ki4ucnHkyBEzuwQAAACAUqFI37n6+uuvXd4bhqGUlBRNmzZNrVu3NmVgAAAAAFCaFOnOVY8ePVxeDz/8sMaOHasmTZpo1qxZBe5n/fr16tatm0JDQ2WxWLR48eJr7rN27Vo1a9ZMXl5eqlOnjuLi4vLUTJ8+XTVr1pS3t7datWqlzZs3F2J2AAAAAFB4RQpXDofD5ZWbm6vU1FR9/vnnqlKlSoH7ycrKUtOmTTV9+vQC1ScnJ6tr165q3769kpKSNGzYMD3zzDP67rvvnDULFy5UdHS0xowZo61bt6pp06aKiorS0aNHCz1PAAAAACioIv+IsBk6d+6szp07F7h+xowZCgsL08SJEyVJd9xxhzZs2KDJkycrKipKkjRp0iQNGjRIAwYMcO6zdOlSzZo1S6+//rr5kwAAAAAAFTFcRUdHF7h20qRJRTlEvhISEhQZGenSFhUVpWHDhkmSzp8/ry1btmjkyJHO7VarVZGRkUpISLhivzk5OcrJyXG+z8jIkCTZ7XbZ7XbTxl9Ux48f15kzZ4qt/3LlyikoKKjY+gcAAMDNxeFwyGbzkM3mkIeH+dfLNtvF/h0Oh9uvxwtz/CKFq23btmnbtm26cOGC6tWrJ0nau3evPDw81KxZM2edxWIpSvdXlJqaquDgYJe24OBgZWRkKDs7W6dOnVJubm6+Nbt3775iv7GxsYqJicnTnpiYKD8/P3MGX0Tnz5/Xrl17deGCo9iOUaaMVQ0a3C5PT89iOwYAAABuHtnZ2Xr88SjZbIfk4WH+129yc7Nlt0fp0KFDbv96T1ZWVoFrixSuunXrpnLlymnOnDkqX768pIs/LDxgwADde++9Gj58eFG6dZuRI0e63I3LyMhQtWrV1KJFC/n7+7txZBe/Z/baa1Pl5TVUPj5VTe8/O/sP5eRM1bx59yssLMz0/gEAAHDzSU5O1qhR0xQYGClfX/OvIc+eTVZ6+jTNmxfp9mvUS0+1FUSRwtXEiRO1YsUKZ7CSpPLly2v8+PHq2LFjsYWrkJAQpaWlubSlpaXJ399fPj4+8vDwkIeHR741ISEhV+zXy8tLXl5eedptNptsNrd+LU1Wq1V2e67Klq0uL6/apvdvt1uVlZUrq9Xq9rkCAACgdLh0jWq3W5Wba/41pN1+sf+ScI1amOMXabXAjIwMHTt2LE/7sWPHivW7QREREYqPj3dpW7lypSIiIiRJnp6eat68uUuNw+FQfHy8swYAAAAAikORwlXPnj01YMAAffnll/rjjz/0xx9/6H//+58GDhyohx9+uMD9ZGZmKikpSUlJSZIu3l5MSkrS4cOHJV18XK9v377O+ueff16//fabXn31Ve3evVsfffSR/vvf/+rvf/+7syY6Olr/+te/NGfOHP3666964YUXlJWV5Vw9EAAAAACKQ5Husc2YMUOvvPKKHn/8cV24cOFiRzabBg4cqPfee6/A/SQmJqp9+/bO95e+99SvXz/FxcUpJSXFGbQkKSwsTEuXLtXf//53TZ06VVWrVtWnn37qXIZdknr16qVjx45p9OjRSk1NVXh4uJYvX55nkQsAAAAAMFORwpWvr68++ugjvffeezpw4IAkqXbt2oVeWa9du3YyDOOK2+Pi4vLdZ9u2bVftd8iQIRoyZEihxgIAAAAA16NIjwVekpKSopSUFNWtW1d+fn5XDUoAAAAAcDMrUrg6ceKEOnTooNtvv11dunRRSkqKJGngwIGlbhl2AAAAADBDkcLV3//+d5UpU0aHDx+Wr6+vs71Xr15avny5aYMDAAAAgNKiSN+5WrFihb777jtVrer6o7Z169bVoUOHTBkYAAAAAJQmRbpzlZWV5XLH6pKTJ0/m+2O8AAAAAHCzK1K4uvfeezV37lzne4vFIofDoXfffddlaXUAAAAAuFUU6bHAd999Vx06dFBiYqLOnz+vV199VTt37tTJkyf1ww8/mD1GAAAAACjxinTnqlGjRtq7d6/atGmj7t27KysrSw8//LC2bdum2rVrmz1GAAAAACjxCn3n6sKFC+rUqZNmzJihN954ozjGBAAAAAClTqHvXJUpU0bbt28vjrEAAAAAQKlVpMcCn3zySX322WdmjwUAAAAASq0iLWhht9s1a9YsrVq1Ss2bN5efn5/L9kmTJpkyOAAAAAAoLQoVrn777TfVrFlTO3bsULNmzSRJe/fudamxWCzmjQ4AAAAASolChau6desqJSVFa9askST16tVLH3zwgYKDg4tlcAAAAABQWhTqO1eGYbi8//bbb5WVlWXqgAAAAACgNCrSghaXXB62AAAAAOBWVahwZbFY8nyniu9YAQAAAEAhv3NlGIb69+8vLy8vSdK5c+f0/PPP51kt8MsvvzRvhAAAAABQChQqXPXr18/l/ZNPPmnqYAAAAACgtCpUuJo9e3ZxjQMAAAAASrXrWtACAAAAAHAR4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABCUiXE2fPl01a9aUt7e3WrVqpc2bN1+xtl27drJYLHleXbt2ddb0798/z/ZOnTrdiKkAAAAAuEXZ3D2AhQsXKjo6WjNmzFCrVq00ZcoURUVFac+ePapcuXKe+i+//FLnz593vj9x4oSaNm2qRx991KWuU6dOmj17tvO9l5dX8U0CAAAAwC3P7XeuJk2apEGDBmnAgAFq0KCBZsyYIV9fX82aNSvf+goVKigkJMT5WrlypXx9ffOEKy8vL5e68uXL34jpAAAAALhFufXO1fnz57VlyxaNHDnS2Wa1WhUZGamEhIQC9fHZZ5+pd+/e8vPzc2lfu3atKleurPLly+v+++/X+PHjVbFixXz7yMnJUU5OjvN9RkaGJMlut8tutxd2WqZyOByy2Txksznk4WH+WGy2i/07HA63zxUAAAClw610jVqY47s1XB0/fly5ubkKDg52aQ8ODtbu3buvuf/mzZu1Y8cOffbZZy7tnTp10sMPP6ywsDAdOHBAo0aNUufOnZWQkCAPD488/cTGxiomJiZPe2JiYp7QdqNlZ2fr8cejZLMdkofHUdP7z83Nlt0epUOHDunoUfP7BwAAwM3nVrpGzcrKKnCt279zdT0+++wzNW7cWC1btnRp7927t/PPjRs3VpMmTVS7dm2tXbtWHTp0yNPPyJEjFR0d7XyfkZGhatWqqUWLFvL39y++CRRAcnKyRo2apsDASPn6hpne/9mzyUpPn6Z58yIVFmZ+/wAAALj53ErXqJeeaisIt4aroKAgeXh4KC0tzaU9LS1NISEhV903KytLCxYs0FtvvXXN49SqVUtBQUHav39/vuHKy8sr3wUvbDabbDb35k+r1Sq7PVd2u1W5ueaPxW6/2L/VanX7XAEAAFA63ErXqIU5vlsXtPD09FTz5s0VHx/vbHM4HIqPj1dERMRV9120aJFycnL05JNPXvM4f/zxh06cOKEqVapc95gBAAAAID9uXy0wOjpa//rXvzRnzhz9+uuveuGFF5SVlaUBAwZIkvr27euy4MUln332mXr06JFnkYrMzEyNGDFCP/74ow4ePKj4+Hh1795dderUUVRU1A2ZEwAAAIBbj9ufA+vVq5eOHTum0aNHKzU1VeHh4Vq+fLlzkYvDhw/LanXNgHv27NGGDRu0YsWKPP15eHho+/btmjNnjtLT0xUaGqqOHTtq3Lhx/NYVAAAAgGLj9nAlSUOGDNGQIUPy3bZ27do8bfXq1ZNhGPnW+/j46LvvvjNzeAAAAABwTW5/LBAAAAAAbgaEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABOUiHA1ffp01axZU97e3mrVqpU2b958xdq4uDhZLBaXl7e3t0uNYRgaPXq0qlSpIh8fH0VGRmrfvn3FPQ0AAAAAtzC3h6uFCxcqOjpaY8aM0datW9W0aVNFRUXp6NGjV9zH399fKSkpztehQ4dctr/77rv64IMPNGPGDG3atEl+fn6KiorSuXPnins6AAAAAG5Rbg9XkyZN0qBBgzRgwAA1aNBAM2bMkK+vr2bNmnXFfSwWi0JCQpyv4OBg5zbDMDRlyhT94x//UPfu3dWkSRPNnTtXR44c0eLFi2/AjAAAAADcimzuPPj58+e1ZcsWjRw50tlmtVoVGRmphISEK+6XmZmpGjVqyOFwqFmzZnr77bfVsGFDSVJycrJSU1MVGRnprA8ICFCrVq2UkJCg3r175+kvJydHOTk5zvcZGRmSJLvdLrvdft3zvB4Oh0M2m4dsNoc8PMwfi812sX+Hw+H2uQIAAKB0uJWuUQtzfLeGq+PHjys3N9flzpMkBQcHa/fu3fnuU69ePc2aNUtNmjTR6dOn9f777+uee+7Rzp07VbVqVaWmpjr7uLzPS9suFxsbq5iYmDztiYmJ8vPzK8rUTJOdna3HH4+SzXZIHh5XflSyqHJzs2W3R+nQoUNXfRQTAAAAuORWukbNysoqcK1bw1VRREREKCIiwvn+nnvu0R133KFPPvlE48aNK1KfI0eOVHR0tPN9RkaGqlWrphYtWsjf3/+6x3w9kpOTNWrUNAUGRsrXN8z0/s+eTVZ6+jTNmxepsDDz+wcAAMDN51a6Rr30VFtBuDVcBQUFycPDQ2lpaS7taWlpCgkJKVAfZcqU0Z133qn9+/dLknO/tLQ0ValSxaXP8PDwfPvw8vKSl5dXnnabzSabzb3502q1ym7Pld1uVW6u+WOx2y/2b7Va3T5XAAAAlA630jVqYY7v1gUtPD091bx5c8XHxzvbHA6H4uPjXe5OXU1ubq5++eUXZ5AKCwtTSEiIS58ZGRnatGlTgfsEAAAAgMJy+62K6Oho9evXTy1atFDLli01ZcoUZWVlacCAAZKkvn376rbbblNsbKwk6a233tLdd9+tOnXqKD09Xe+9954OHTqkZ555RtLFlQSHDRum8ePHq27dugoLC9Obb76p0NBQ9ejRw13TBAAAAHCTc3u46tWrl44dO6bRo0crNTVV4eHhWr58uXNBisOHD8tq/b8bbKdOndKgQYOUmpqq8uXLq3nz5tq4caMaNGjgrHn11VeVlZWlZ599Vunp6WrTpo2WL1+e58eGAQAAAMAsbg9XkjRkyBANGTIk321r1651eT958mRNnjz5qv1ZLBa99dZbeuutt8waIgAAAABcldt/RBgAAAAAbgaEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwAAAAAwAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhCsAAAAAMAHhCgAAAABMQLgCAAAAABOUiHA1ffp01axZU97e3mrVqpU2b958xdp//etfuvfee1W+fHmVL19ekZGReer79+8vi8Xi8urUqVNxTwMAAADALczt4WrhwoWKjo7WmDFjtHXrVjVt2lRRUVE6evRovvVr165Vnz59tGbNGiUkJKhatWrq2LGj/vzzT5e6Tp06KSUlxfmaP3/+jZgOAAAAgFuU28PVpEmTNGjQIA0YMEANGjTQjBkz5Ovrq1mzZuVbP2/ePL344osKDw9X/fr19emnn8rhcCg+Pt6lzsvLSyEhIc5X+fLlb8R0AAAAANyibO48+Pnz57VlyxaNHDnS2Wa1WhUZGamEhIQC9XH27FlduHBBFSpUcGlfu3atKleurPLly+v+++/X+PHjVbFixXz7yMnJUU5OjvN9RkaGJMlut8tutxd2WqZyOByy2Txksznk4WH+WGy2i/07HA63zxUAAAClw610jVqY47s1XB0/fly5ubkKDg52aQ8ODtbu3bsL1Mdrr72m0NBQRUZGOts6deqkhx9+WGFhYTpw4IBGjRqlzp07KyEhQR4eHnn6iI2NVUxMTJ72xMRE+fn5FXJW5srOztbjj0fJZjskD4/8H5W8Hrm52bLbo3To0KErPooJAAAA/NWtdI2alZVV4Fq3hqvrNWHCBC1YsEBr166Vt7e3s713797OPzdu3FhNmjRR7dq1tXbtWnXo0CFPPyNHjlR0dLTzfUZGhqpVq6YWLVrI39+/eCdxDcnJyRo1apoCAyPl6xtmev9nzyYrPX2a5s2LVFiY+f0DAADg5nMrXaNeeqqtINwaroKCguTh4aG0tDSX9rS0NIWEhFx13/fff18TJkzQqlWr1KRJk6vW1qpVS0FBQdq/f3++4crLy0teXl552m02m2w29+ZPq9Uquz1XdrtVubnmj8Vuv9i/1Wp1+1wBAABQOtxK16iFOb5bF7Tw9PRU8+bNXRajuLQ4RURExBX3e/fddzVu3DgtX75cLVq0uOZx/vjjD504cUJVqlQxZdwAAAAAcDm3rxYYHR2tf/3rX5ozZ45+/fVXvfDCC8rKytKAAQMkSX379nVZ8OKdd97Rm2++qVmzZqlmzZpKTU1VamqqMjMzJUmZmZkaMWKEfvzxRx08eFDx8fHq3r276tSpo6ioKLfMEQAAAMDNz+3PgfXq1UvHjh3T6NGjlZqaqvDwcC1fvty5yMXhw4dltf5fBvz44491/vx5PfLIIy79jBkzRmPHjpWHh4e2b9+uOXPmKD09XaGhoerYsaPGjRuX76N/AAAAAGAGt4crSRoyZIiGDBmS77a1a9e6vD948OBV+/Lx8dF3331n0sgAAAAAoGDc/lggAAAAANwMCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAAAAYALCFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmKBHhavr06apZs6a8vb3VqlUrbd68+ar1ixYtUv369eXt7a3GjRtr2bJlLtsNw9Do0aNVpUoV+fj4KDIyUvv27SvOKQAAAAC4xbk9XC1cuFDR0dEaM2aMtm7dqqZNmyoqKkpHjx7Nt37jxo3q06ePBg4cqG3btqlHjx7q0aOHduzY4ax599139cEHH2jGjBnatGmT/Pz8FBUVpXPnzt2oaQEAAAC4xbg9XE2aNEmDBg3SgAED1KBBA82YMUO+vr6aNWtWvvVTp05Vp06dNGLECN1xxx0aN26cmjVrpmnTpkm6eNdqypQp+sc//qHu3burSZMmmjt3ro4cOaLFixffwJkBAAAAuJXY3Hnw8+fPa8uWLRo5cqSzzWq1KjIyUgkJCfnuk5CQoOjoaJe2qKgoZ3BKTk5WamqqIiMjndsDAgLUqlUrJSQkqHfv3nn6zMnJUU5OjvP96dOnJUknT56U3W4v8vzMkJGRIYvFoezsXyVlmN5/dvafcjhytHPnTmVkmN8/AAAAbj6///67HI4LxXqNarE4lJGRoZMnT5ref2FcukY2DOOatW4NV8ePH1dubq6Cg4Nd2oODg7V79+5890lNTc23PjU11bn9UtuVai4XGxurmJiYPO1hYWEFm8gNsezaJdehe/eVxdo/AAAAbkbfFWvvzZoV7zVwYZw5c0YBAQFXrXFruCopRo4c6XI3zOFw6OTJk6pYsaIsFouki4m1WrVq+v333+Xv7++uoeIWwfmGG4nzDTcS5xtuJM43mMEwDJ05c0ahoaHXrHVruAoKCpKHh4fS0tJc2tPS0hQSEpLvPiEhIVetv/S/aWlpqlKliktNeHh4vn16eXnJy8vLpS0wMDDfWn9/f/7hxA3D+YYbifMNNxLnG24kzjdcr2vdsbrErQtaeHp6qnnz5oqPj3e2ORwOxcfHKyIiIt99IiIiXOolaeXKlc76sLAwhYSEuNRkZGRo06ZNV+wTAAAAAK6X2x8LjI6OVr9+/dSiRQu1bNlSU6ZMUVZWlgYMGCBJ6tu3r2677TbFxsZKkoYOHaq2bdtq4sSJ6tq1qxYsWKDExETNnDlTkmSxWDRs2DCNHz9edevWVVhYmN58802FhoaqR48e7pomAAAAgJuc28NVr169dOzYMY0ePVqpqakKDw/X8uXLnQtSHD58WFbr/91gu+eee/T555/rH//4h0aNGqW6detq8eLFatSokbPm1VdfVVZWlp599lmlp6erTZs2Wr58uby9vYs8Ti8vL40ZMybP44NAceB8w43E+YYbifMNNxLnG240i1GQNQUBAAAAAFfl9h8RBgAAAICbAeEKAAAAAExAuAIAAAAAExCuAAAAAMAEhKsCmj59umrWrClvb2+1atVKmzdvdveQUMKtX79e3bp1U2hoqCwWixYvXuyy3TAMjR49WlWqVJGPj48iIyO1b98+l5qTJ0/qiSeekL+/vwIDAzVw4EBlZma61Gzfvl333nuvvL29Va1aNb377rvFPTWUQLGxsbrrrrtUrlw5Va5cWT169NCePXtcas6dO6fBgwerYsWKKlu2rP72t7/l+VH2w4cPq2vXrvL19VXlypU1YsQI2e12l5q1a9eqWbNm8vLyUp06dRQXF1fc00MJ8/HHH6tJkybOH2aNiIjQt99+69zOuYbiMmHCBOfP7lzC+YYSxcA1LViwwPD09DRmzZpl7Ny50xg0aJARGBhopKWluXtoKMGWLVtmvPHGG8aXX35pSDK++uorl+0TJkwwAgICjMWLFxs///yz8dBDDxlhYWFGdna2s6ZTp05G06ZNjR9//NH4/vvvjTp16hh9+vRxbj99+rQRHBxsPPHEE8aOHTuM+fPnGz4+PsYnn3xyo6aJEiIqKsqYPXu2sWPHDiMpKcno0qWLUb16dSMzM9NZ8/zzzxvVqlUz4uPjjcTEROPuu+827rnnHud2u91uNGrUyIiMjDS2bdtmLFu2zAgKCjJGjhzprPntt98MX19fIzo62ti1a5fx4YcfGh4eHsby5ctv6HzhXl9//bWxdOlSY+/evcaePXuMUaNGGWXKlDF27NhhGAbnGorH5s2bjZo1axpNmjQxhg4d6mznfENJQrgqgJYtWxqDBw92vs/NzTVCQ0ON2NhYN44Kpcnl4crhcBghISHGe++952xLT083vLy8jPnz5xuGYRi7du0yJBk//fSTs+bbb781LBaL8eeffxqGYRgfffSRUb58eSMnJ8dZ89prrxn16tUr5hmhpDt69KghyVi3bp1hGBfPrzJlyhiLFi1y1vz666+GJCMhIcEwjIv/QcBqtRqpqanOmo8//tjw9/d3nmOvvvqq0bBhQ5dj9erVy4iKiiruKaGEK1++vPHpp59yrqFYnDlzxqhbt66xcuVKo23bts5wxfmGkobHAq/h/Pnz2rJliyIjI51tVqtVkZGRSkhIcOPIUJolJycrNTXV5bwKCAhQq1atnOdVQkKCAgMD1aJFC2dNZGSkrFarNm3a5Ky577775Onp6ayJiorSnj17dOrUqRs0G5REp0+fliRVqFBBkrRlyxZduHDB5ZyrX7++qlev7nLONW7c2Pkj7tLF8ykjI0M7d+501vy1j0s1/Pvw1pWbm6sFCxYoKytLERERnGsoFoMHD1bXrl3znBOcbyhpbO4eQEl3/Phx5ebmuvwDKUnBwcHavXu3m0aF0i41NVWS8j2vLm1LTU1V5cqVXbbbbDZVqFDBpSYsLCxPH5e2lS9fvljGj5LN4XBo2LBhat26tRo1aiTp4vng6empwMBAl9rLz7n8zslL265Wk5GRoezsbPn4+BTHlFAC/fLLL4qIiNC5c+dUtmxZffXVV2rQoIGSkpI412CqBQsWaOvWrfrpp5/ybOPfbShpCFcAcJMZPHiwduzYoQ0bNrh7KLiJ1atXT0lJSTp9+rS++OIL9evXT+vWrXP3sHCT+f333zV06FCtXLlS3t7e7h4OcE08FngNQUFB8vDwyLPqTFpamkJCQtw0KpR2l86dq51XISEhOnr0qMt2u92ukydPutTk18dfj4Fby5AhQ/TNN99ozZo1qlq1qrM9JCRE58+fV3p6ukv95efctc6nK9X4+/vzX3ZvMZ6enqpTp46aN2+u2NhYNW3aVFOnTuVcg6m2bNmio0ePqlmzZrLZbLLZbFq3bp0++OAD2Ww2BQcHc76hRCFcXYOnp6eaN2+u+Ph4Z5vD4VB8fLwiIiLcODKUZmFhYQoJCXE5rzIyMrRp0ybneRUREaH09HRt2bLFWbN69Wo5HA61atXKWbN+/XpduHDBWbNy5UrVq1ePRwJvMYZhaMiQIfrqq6+0evXqPI+LNm/eXGXKlHE55/bs2aPDhw+7nHO//PKLS6hfuXKl/P391aBBA2fNX/u4VMO/D+FwOJSTk8O5BlN16NBBv/zyi5KSkpyvFi1a6IknnnD+mfMNJYq7V9QoDRYsWGB4eXkZcXFxxq5du4xnn33WCAwMdFl1BrjcmTNnjG3bthnbtm0zJBmTJk0ytm3bZhw6dMgwjItLsQcGBhpLliwxtm/fbnTv3j3fpdjvvPNOY9OmTcaGDRuMunXruizFnp6ebgQHBxtPPfWUsWPHDmPBggWGr68vS7Hfgl544QUjICDAWLt2rZGSkuJ8nT171lnz/PPPG9WrVzdWr15tJCYmGhEREUZERIRz+6Xlijt27GgkJSUZy5cvNypVqpTvcsUjRowwfv31V2P69OksV3wLev31141169YZycnJxvbt243XX3/dsFgsxooVKwzD4FxD8frraoGGwfmGkoVwVUAffvihUb16dcPT09No2bKl8eOPP7p7SCjh1qxZY0jK8+rXr59hGBeXY3/zzTeN4OBgw8vLy+jQoYOxZ88elz5OnDhh9OnTxyhbtqzh7+9vDBgwwDhz5oxLzc8//2y0adPG8PLyMm677TZjwoQJN2qKKEHyO9ckGbNnz3bWZGdnGy+++KJRvnx5w9fX1+jZs6eRkpLi0s/BgweNzp07Gz4+PkZQUJAxfPhw48KFCy41a9asMcLDww1PT0+jVq1aLsfAreHpp582atSoYXh6ehqVKlUyOnTo4AxWhsG5huJ1ebjifENJYjEMw3DPPTMAAAAAuHnwnSsAAAAAMAHhCgAAAABMQLgCAAAAABMQrgAAAADABIQrAAAAADAB4QoAAAAATEC4AgAAAAATEK4AAAAAwASEKwBAqXLw4EFZLBYlJSW5eygAALggXAEAbjiLxXLV19ixY909xHzt379fAwYMUNWqVeXl5aWwsDD16dNHiYmJN3QcBEwAKJls7h4AAODWk5KS4vzzwoULNXr0aO3Zs8fZVrZsWXcM66oSExPVoUMHNWrUSJ988onq16+vM2fOaMmSJRo+fLjWrVvn7iECANyMO1cAgBsuJCTE+QoICJDFYnG+r1y5siZNmuS8OxQeHq7ly5dfsa/c3Fw9/fTTql+/vg4fPixJWrJkiZo1ayZvb2/VqlVLMTExstvtzn0sFos+/fRT9ezZU76+vqpbt66+/vrrKx7DMAz1799fdevW1ffff6+uXbuqdu3aCg8P15gxY7RkyRJn7S+//KL7779fPj4+qlixop599lllZmY6t7dr107Dhg1z6b9Hjx7q37+/833NmjX19ttv6+mnn1a5cuVUvXp1zZw507k9LCxMknTnnXfKYrGoXbt2V/28AQA3BuEKAFCiTJ06VRMnTtT777+v7du3KyoqSg899JD27duXpzYnJ0ePPvqokpKS9P3336t69er6/vvv1bdvXw0dOlS7du3SJ598ori4OP3zn/902TcmJkaPPfaYtm/fri5duuiJJ57QyZMn8x1TUlKSdu7cqeHDh8tqzft/nYGBgZKkrKwsRUVFqXz58vrpp5+0aNEirVq1SkOGDCn05zBx4kS1aNFC27Zt04svvqgXXnjBeXdv8+bNkqRVq1YpJSVFX375ZaH7BwCYj3AFAChR3n//fb322mvq3bu36tWrp3feeUfh4eGaMmWKS11mZqa6du2qY8eOac2aNapUqZKki6Hp9ddfV79+/VSrVi098MADGjdunD755BOX/fv3768+ffqoTp06evvtt5WZmekMLZe7FOzq169/1bF//vnnOnfunObOnatGjRrp/vvv17Rp0/Tvf/9baWlphfocunTpohdffFF16tTRa6+9pqCgIK1Zs0aSnHOtWLGiQkJCVKFChUL1DQAoHnznCgBQYmRkZOjIkSNq3bq1S3vr1q31888/u7T16dNHVatW1erVq+Xj4+Ns//nnn/XDDz+43KnKzc3VuXPndPbsWfn6+kqSmjRp4tzu5+cnf39/HT16NN9xGYZRoPH/+uuvatq0qfz8/FzG7nA4tGfPHgUHBxeon8vHd+mxySuNDwBQMnDnCgBQKnXp0kXbt29XQkKCS3tmZqZiYmKUlJTkfP3yyy/at2+fvL29nXVlypRx2c9iscjhcOR7rNtvv12StHv37uset9VqzRPWLly4kKeuMOMDAJQMhCsAQInh7++v0NBQ/fDDDy7tP/zwgxo0aODS9sILL2jChAl66KGHXFbqa9asmfbs2aM6derkeeX3famCCA8PV4MGDTRx4sR8A056erok6Y477tDPP/+srKwsl7FbrVbVq1dP0sVH+v66WmJubq527NhRqPF4eno69wUAlByEKwBAiTJixAi98847Wrhwofbs2aPXX39dSUlJGjp0aJ7al156SePHj9eDDz6oDRs2SJJGjx6tuXPnKiYmRjt37tSvv/6qBQsW6B//+EeRx2SxWDR79mzt3btX9957r5YtW6bffvtN27dv1z//+U91795dkvTEE0/I29tb/fr1044dO7RmzRq99NJLeuqpp5yPBN5///1aunSpli5dqt27d+uFF15whrOCqly5snx8fLR8+XKlpaXp9OnTRZ4bAMA8hCsAQIny8ssvKzo6WsOHD1fjxo21fPlyff3116pbt26+9cOGDVNMTIy6dOmijRs3KioqSt98841WrFihu+66S3fffbcmT56sGjVqXNe4WrZsqcTERNWpU0eDBg3SHXfcoYceekg7d+50Lrbh6+ur7777TidPntRdd92lRx55RB06dNC0adOc/Tz99NPq16+f+vbtq7Zt26pWrVpq3759ocZis9n0wQcf6JNPPlFoaKgz3AEA3MtiFPRbugAAAACAK+LOFQAAAACYgHAFAAAAACYgXAEAAACACQhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJ/j8q2Y+J8ctKiQAAAABJRU5ErkJggg==\n" + }, + "metadata": {} + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import os\n", + "import tiktoken\n", + "from bs4 import BeautifulSoup as Soup\n", + "from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader\n", + "\n", + "\n", + "def num_tokens_from_string(string: str, encoding_name: str) -> int:\n", + " \"\"\"Returns the number of tokens in a text string.\"\"\"\n", + " encoding = tiktoken.get_encoding(encoding_name)\n", + " num_tokens = len(encoding.encode(string))\n", + " return num_tokens\n", + "\n", + "\n", + "# LCEL docs\n", + "url = \"https://www.sciencedirect.com/science/article/pii/S135964462400117X\"\n", + "loader = RecursiveUrlLoader(\n", + " url=url, max_depth=1, extractor=lambda x: Soup(x, \"html.parser\").text\n", + ")\n", + "docs = loader.load()\n", + "\n", + "# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)\n", + "url = \"https://www.chemicalqdevice.com/cancer-drug-discovery-innovation\"\n", + "loader = RecursiveUrlLoader(\n", + " url=url, max_depth=1, extractor=lambda x: Soup(x, \"html.parser\").text\n", + ")\n", + "docs_pydantic = loader.load()\n", + "\n", + "# LCEL w/ Self Query (outside the primary LCEL docs)\n", + "url = \"https://www.chemicalqdevice.com/cancer-drug-discovery-ai\"\n", + "loader = RecursiveUrlLoader(\n", + " url=url, max_depth=1, extractor=lambda x: Soup(x, \"html.parser\").text\n", + ")\n", + "docs_sq = loader.load()\n", + "\n", + "# Doc texts\n", + "docs.extend([*docs_pydantic, *docs_sq])\n", + "docs_texts = [d.page_content for d in docs]\n", + "\n", + "# Calculate the number of tokens for each document\n", + "counts = [num_tokens_from_string(d, \"cl100k_base\") for d in docs_texts]\n", + "\n", + "# Plotting the histogram of token counts\n", + "plt.figure(figsize=(10, 6))\n", + "plt.hist(counts, bins=30, color=\"blue\", edgecolor=\"black\", alpha=0.7)\n", + "plt.title(\"Histogram of Token Counts\")\n", + "plt.xlabel(\"Token Count\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.grid(axis=\"y\", alpha=0.75)\n", + "\n", + "# Display the histogram\n", + "plt.show" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "70750603-ec82-4439-9b32-d22014b5ff2c", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "70750603-ec82-4439-9b32-d22014b5ff2c", + "outputId": "1ea8ed1c-2bf9-4be5-8e31-d5ec8a2b64ba" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Num tokens in all context: 9514\n" + ] + } + ], + "source": [ + "# Doc texts concat\n", + "d_sorted = sorted(docs, key=lambda x: x.metadata[\"source\"])\n", + "d_reversed = list(reversed(d_sorted))\n", + "concatenated_content = \"\\n\\n\\n --- \\n\\n\\n\".join(\n", + " [doc.page_content for doc in d_reversed]\n", + ")\n", + "print(\n", + " \"Num tokens in all context: %s\"\n", + " % num_tokens_from_string(concatenated_content, \"cl100k_base\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "25ca3cf2-0f6b-40f9-a2ff-285a8dcb33dc", + "metadata": { + "id": "25ca3cf2-0f6b-40f9-a2ff-285a8dcb33dc" + }, + "outputs": [], + "source": [ + "# Doc texts split\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "chunk_size_tok = 2000\n", + "text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n", + " chunk_size=chunk_size_tok, chunk_overlap=0\n", + ")\n", + "texts_split = text_splitter.split_text(concatenated_content)" + ] + }, + { + "cell_type": "markdown", + "id": "797a5469-0942-45a5-adb6-f12e05d76798", + "metadata": { + "id": "797a5469-0942-45a5-adb6-f12e05d76798" + }, + "source": [ + "## Models\n", + "\n", + "We can test various models, including the new [Claude3](https://www.anthropic.com/news/claude-3-family) family.\n", + "\n", + "Be sure to set the relevant API keys:\n", + "\n", + "* `ANTHROPIC_API_KEY`\n", + "* `OPENAI_API_KEY`" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "033e71d3-5dc8-42a3-a0b7-4df116048c14", + "metadata": { + "id": "033e71d3-5dc8-42a3-a0b7-4df116048c14" + }, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "embd = OpenAIEmbeddings()\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "model = ChatOpenAI(temperature=0, model=\"gpt-4o-2024-05-13\")\n", + "\n", + "# from langchain_anthropic import ChatAnthropic\n", + "\n", + "# model = ChatAnthropic(temperature=0, model=\"claude-3-opus-20240229\")" + ] + }, + { + "cell_type": "markdown", + "id": "5c63db01-cf95-4c17-ae5d-8dc7267ad58a", + "metadata": { + "id": "5c63db01-cf95-4c17-ae5d-8dc7267ad58a" + }, + "source": [ + "### Tree Constrution\n", + "\n", + "The clustering approach in tree construction includes a few interesting ideas.\n", + "\n", + "**GMM (Gaussian Mixture Model)**\n", + "\n", + "- Model the distribution of data points across different clusters\n", + "- Optimal number of clusters by evaluating the model's Bayesian Information Criterion (BIC)\n", + "\n", + "**UMAP (Uniform Manifold Approximation and Projection)**\n", + "\n", + "- Supports clustering\n", + "- Reduces the dimensionality of high-dimensional data\n", + "- UMAP helps to highlight the natural grouping of data points based on their similarities\n", + "\n", + "**Local and Global Clustering**\n", + "\n", + "- Used to analyze data at different scales\n", + "- Both fine-grained and broader patterns within the data are captured effectively\n", + "\n", + "**Thresholding**\n", + "\n", + "- Apply in the context of GMM to determine cluster membership\n", + "- Based on the probability distribution (assignment of data points to ≥ 1 cluster)\n", + "---\n", + "\n", + "Code for GMM and thresholding is from Sarthi et al, as noted in the below two sources:\n", + "\n", + "* [Origional repo](https://github.com/parthsarthi03/raptor/blob/master/raptor/cluster_tree_builder.py)\n", + "* [Minor tweaks](https://github.com/run-llama/llama_index/blob/main/llama-index-packs/llama-index-packs-raptor/llama_index/packs/raptor/clustering.py)\n", + "\n", + "Full credit to both authors." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a849980c-27d4-48e0-87a0-c2a5143cb8c0", + "metadata": { + "id": "a849980c-27d4-48e0-87a0-c2a5143cb8c0" + }, + "outputs": [], + "source": [ + "from typing import Dict, List, Optional, Tuple\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import umap\n", + "from langchain.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from sklearn.mixture import GaussianMixture\n", + "\n", + "RANDOM_SEED = 224 # Fixed seed for reproducibility\n", + "\n", + "### --- Code from citations referenced above (added comments and docstrings) --- ###\n", + "\n", + "\n", + "def global_cluster_embeddings(\n", + " embeddings: np.ndarray,\n", + " dim: int,\n", + " n_neighbors: Optional[int] = None,\n", + " metric: str = \"cosine\",\n", + ") -> np.ndarray:\n", + " \"\"\"\n", + " Perform global dimensionality reduction on the embeddings using UMAP.\n", + "\n", + " Parameters:\n", + " - embeddings: The input embeddings as a numpy array.\n", + " - dim: The target dimensionality for the reduced space.\n", + " - n_neighbors: Optional; the number of neighbors to consider for each point.\n", + " If not provided, it defaults to the square root of the number of embeddings.\n", + " - metric: The distance metric to use for UMAP.\n", + "\n", + " Returns:\n", + " - A numpy array of the embeddings reduced to the specified dimensionality.\n", + " \"\"\"\n", + " if n_neighbors is None:\n", + " n_neighbors = int((len(embeddings) - 1) ** 0.5)\n", + " return umap.UMAP(\n", + " n_neighbors=n_neighbors, n_components=dim, metric=metric\n", + " ).fit_transform(embeddings)\n", + "\n", + "\n", + "def local_cluster_embeddings(\n", + " embeddings: np.ndarray, dim: int, num_neighbors: int = 10, metric: str = \"cosine\"\n", + ") -> np.ndarray:\n", + " \"\"\"\n", + " Perform local dimensionality reduction on the embeddings using UMAP, typically after global clustering.\n", + "\n", + " Parameters:\n", + " - embeddings: The input embeddings as a numpy array.\n", + " - dim: The target dimensionality for the reduced space.\n", + " - num_neighbors: The number of neighbors to consider for each point.\n", + " - metric: The distance metric to use for UMAP.\n", + "\n", + " Returns:\n", + " - A numpy array of the embeddings reduced to the specified dimensionality.\n", + " \"\"\"\n", + " return umap.UMAP(\n", + " n_neighbors=num_neighbors, n_components=dim, metric=metric\n", + " ).fit_transform(embeddings)\n", + "\n", + "\n", + "def get_optimal_clusters(\n", + " embeddings: np.ndarray, max_clusters: int = 50, random_state: int = RANDOM_SEED\n", + ") -> int:\n", + " \"\"\"\n", + " Determine the optimal number of clusters using the Bayesian Information Criterion (BIC) with a Gaussian Mixture Model.\n", + "\n", + " Parameters:\n", + " - embeddings: The input embeddings as a numpy array.\n", + " - max_clusters: The maximum number of clusters to consider.\n", + " - random_state: Seed for reproducibility.\n", + "\n", + " Returns:\n", + " - An integer representing the optimal number of clusters found.\n", + " \"\"\"\n", + " max_clusters = min(max_clusters, len(embeddings))\n", + " n_clusters = np.arange(1, max_clusters)\n", + " bics = []\n", + " for n in n_clusters:\n", + " gm = GaussianMixture(n_components=n, random_state=random_state)\n", + " gm.fit(embeddings)\n", + " bics.append(gm.bic(embeddings))\n", + " return n_clusters[np.argmin(bics)]\n", + "\n", + "\n", + "def GMM_cluster(embeddings: np.ndarray, threshold: float, random_state: int = 0):\n", + " \"\"\"\n", + " Cluster embeddings using a Gaussian Mixture Model (GMM) based on a probability threshold.\n", + "\n", + " Parameters:\n", + " - embeddings: The input embeddings as a numpy array.\n", + " - threshold: The probability threshold for assigning an embedding to a cluster.\n", + " - random_state: Seed for reproducibility.\n", + "\n", + " Returns:\n", + " - A tuple containing the cluster labels and the number of clusters determined.\n", + " \"\"\"\n", + " n_clusters = get_optimal_clusters(embeddings)\n", + " gm = GaussianMixture(n_components=n_clusters, random_state=random_state)\n", + " gm.fit(embeddings)\n", + " probs = gm.predict_proba(embeddings)\n", + " labels = [np.where(prob > threshold)[0] for prob in probs]\n", + " return labels, n_clusters\n", + "\n", + "\n", + "def perform_clustering(\n", + " embeddings: np.ndarray,\n", + " dim: int,\n", + " threshold: float,\n", + ") -> List[np.ndarray]:\n", + " \"\"\"\n", + " Perform clustering on the embeddings by first reducing their dimensionality globally, then clustering\n", + " using a Gaussian Mixture Model, and finally performing local clustering within each global cluster.\n", + "\n", + " Parameters:\n", + " - embeddings: The input embeddings as a numpy array.\n", + " - dim: The target dimensionality for UMAP reduction.\n", + " - threshold: The probability threshold for assigning an embedding to a cluster in GMM.\n", + "\n", + " Returns:\n", + " - A list of numpy arrays, where each array contains the cluster IDs for each embedding.\n", + " \"\"\"\n", + " if len(embeddings) <= dim + 1:\n", + " # Avoid clustering when there's insufficient data\n", + " return [np.array([0]) for _ in range(len(embeddings))]\n", + "\n", + " # Global dimensionality reduction\n", + " reduced_embeddings_global = global_cluster_embeddings(embeddings, dim)\n", + " # Global clustering\n", + " global_clusters, n_global_clusters = GMM_cluster(\n", + " reduced_embeddings_global, threshold\n", + " )\n", + "\n", + " all_local_clusters = [np.array([]) for _ in range(len(embeddings))]\n", + " total_clusters = 0\n", + "\n", + " # Iterate through each global cluster to perform local clustering\n", + " for i in range(n_global_clusters):\n", + " # Extract embeddings belonging to the current global cluster\n", + " global_cluster_embeddings_ = embeddings[\n", + " np.array([i in gc for gc in global_clusters])\n", + " ]\n", + "\n", + " if len(global_cluster_embeddings_) == 0:\n", + " continue\n", + " if len(global_cluster_embeddings_) <= dim + 1:\n", + " # Handle small clusters with direct assignment\n", + " local_clusters = [np.array([0]) for _ in global_cluster_embeddings_]\n", + " n_local_clusters = 1\n", + " else:\n", + " # Local dimensionality reduction and clustering\n", + " reduced_embeddings_local = local_cluster_embeddings(\n", + " global_cluster_embeddings_, dim\n", + " )\n", + " local_clusters, n_local_clusters = GMM_cluster(\n", + " reduced_embeddings_local, threshold\n", + " )\n", + "\n", + " # Assign local cluster IDs, adjusting for total clusters already processed\n", + " for j in range(n_local_clusters):\n", + " local_cluster_embeddings_ = global_cluster_embeddings_[\n", + " np.array([j in lc for lc in local_clusters])\n", + " ]\n", + " indices = np.where(\n", + " (embeddings == local_cluster_embeddings_[:, None]).all(-1)\n", + " )[1]\n", + " for idx in indices:\n", + " all_local_clusters[idx] = np.append(\n", + " all_local_clusters[idx], j + total_clusters\n", + " )\n", + "\n", + " total_clusters += n_local_clusters\n", + "\n", + " return all_local_clusters\n", + "\n", + "\n", + "### --- Our code below --- ###\n", + "\n", + "\n", + "def embed(texts):\n", + " \"\"\"\n", + " Generate embeddings for a list of text documents.\n", + "\n", + " This function assumes the existence of an `embd` object with a method `embed_documents`\n", + " that takes a list of texts and returns their embeddings.\n", + "\n", + " Parameters:\n", + " - texts: List[str], a list of text documents to be embedded.\n", + "\n", + " Returns:\n", + " - numpy.ndarray: An array of embeddings for the given text documents.\n", + " \"\"\"\n", + " text_embeddings = embd.embed_documents(texts)\n", + " text_embeddings_np = np.array(text_embeddings)\n", + " return text_embeddings_np\n", + "\n", + "\n", + "def embed_cluster_texts(texts):\n", + " \"\"\"\n", + " Embeds a list of texts and clusters them, returning a DataFrame with texts, their embeddings, and cluster labels.\n", + "\n", + " This function combines embedding generation and clustering into a single step. It assumes the existence\n", + " of a previously defined `perform_clustering` function that performs clustering on the embeddings.\n", + "\n", + " Parameters:\n", + " - texts: List[str], a list of text documents to be processed.\n", + "\n", + " Returns:\n", + " - pandas.DataFrame: A DataFrame containing the original texts, their embeddings, and the assigned cluster labels.\n", + " \"\"\"\n", + " text_embeddings_np = embed(texts) # Generate embeddings\n", + " cluster_labels = perform_clustering(\n", + " text_embeddings_np, 10, 0.1\n", + " ) # Perform clustering on the embeddings\n", + " df = pd.DataFrame() # Initialize a DataFrame to store the results\n", + " df[\"text\"] = texts # Store original texts\n", + " df[\"embd\"] = list(text_embeddings_np) # Store embeddings as a list in the DataFrame\n", + " df[\"cluster\"] = cluster_labels # Store cluster labels\n", + " return df\n", + "\n", + "\n", + "def fmt_txt(df: pd.DataFrame) -> str:\n", + " \"\"\"\n", + " Formats the text documents in a DataFrame into a single string.\n", + "\n", + " Parameters:\n", + " - df: DataFrame containing the 'text' column with text documents to format.\n", + "\n", + " Returns:\n", + " - A single string where all text documents are joined by a specific delimiter.\n", + " \"\"\"\n", + " unique_txt = df[\"text\"].tolist()\n", + " return \"--- --- \\n --- --- \".join(unique_txt)\n", + "\n", + "\n", + "def embed_cluster_summarize_texts(\n", + " texts: List[str], level: int\n", + ") -> Tuple[pd.DataFrame, pd.DataFrame]:\n", + " \"\"\"\n", + " Embeds, clusters, and summarizes a list of texts. This function first generates embeddings for the texts,\n", + " clusters them based on similarity, expands the cluster assignments for easier processing, and then summarizes\n", + " the content within each cluster.\n", + "\n", + " Parameters:\n", + " - texts: A list of text documents to be processed.\n", + " - level: An integer parameter that could define the depth or detail of processing.\n", + "\n", + " Returns:\n", + " - Tuple containing two DataFrames:\n", + " 1. The first DataFrame (`df_clusters`) includes the original texts, their embeddings, and cluster assignments.\n", + " 2. The second DataFrame (`df_summary`) contains summaries for each cluster, the specified level of detail,\n", + " and the cluster identifiers.\n", + " \"\"\"\n", + "\n", + " # Embed and cluster the texts, resulting in a DataFrame with 'text', 'embd', and 'cluster' columns\n", + " df_clusters = embed_cluster_texts(texts)\n", + "\n", + " # Prepare to expand the DataFrame for easier manipulation of clusters\n", + " expanded_list = []\n", + "\n", + " # Expand DataFrame entries to document-cluster pairings for straightforward processing\n", + " for index, row in df_clusters.iterrows():\n", + " for cluster in row[\"cluster\"]:\n", + " expanded_list.append(\n", + " {\"text\": row[\"text\"], \"embd\": row[\"embd\"], \"cluster\": cluster}\n", + " )\n", + "\n", + " # Create a new DataFrame from the expanded list\n", + " expanded_df = pd.DataFrame(expanded_list)\n", + "\n", + " # Retrieve unique cluster identifiers for processing\n", + " all_clusters = expanded_df[\"cluster\"].unique()\n", + "\n", + " print(f\"--Generated {len(all_clusters)} clusters--\")\n", + "\n", + " # Summarization\n", + " template = \"\"\"Here is a sub-set of Generative AI Drug Discovery doc.\n", + "\n", + " Generative AI Drug Discovery provides a way to improve Drug Discovery.\n", + "\n", + " Give a detailed summary of the documentation provided.\n", + "\n", + " Documentation:\n", + " {context}\n", + " \"\"\"\n", + " prompt = ChatPromptTemplate.from_template(template)\n", + " chain = prompt | model | StrOutputParser()\n", + "\n", + " # Format text within each cluster for summarization\n", + " summaries = []\n", + " for i in all_clusters:\n", + " df_cluster = expanded_df[expanded_df[\"cluster\"] == i]\n", + " formatted_txt = fmt_txt(df_cluster)\n", + " summaries.append(chain.invoke({\"context\": formatted_txt}))\n", + "\n", + " # Create a DataFrame to store summaries with their corresponding cluster and level\n", + " df_summary = pd.DataFrame(\n", + " {\n", + " \"summaries\": summaries,\n", + " \"level\": [level] * len(summaries),\n", + " \"cluster\": list(all_clusters),\n", + " }\n", + " )\n", + "\n", + " return df_clusters, df_summary\n", + "\n", + "\n", + "def recursive_embed_cluster_summarize(\n", + " texts: List[str], level: int = 1, n_levels: int = 3\n", + ") -> Dict[int, Tuple[pd.DataFrame, pd.DataFrame]]:\n", + " \"\"\"\n", + " Recursively embeds, clusters, and summarizes texts up to a specified level or until\n", + " the number of unique clusters becomes 1, storing the results at each level.\n", + "\n", + " Parameters:\n", + " - texts: List[str], texts to be processed.\n", + " - level: int, current recursion level (starts at 1).\n", + " - n_levels: int, maximum depth of recursion.\n", + "\n", + " Returns:\n", + " - Dict[int, Tuple[pd.DataFrame, pd.DataFrame]], a dictionary where keys are the recursion\n", + " levels and values are tuples containing the clusters DataFrame and summaries DataFrame at that level.\n", + " \"\"\"\n", + " results = {} # Dictionary to store results at each level\n", + "\n", + " # Perform embedding, clustering, and summarization for the current level\n", + " df_clusters, df_summary = embed_cluster_summarize_texts(texts, level)\n", + "\n", + " # Store the results of the current level\n", + " results[level] = (df_clusters, df_summary)\n", + "\n", + " # Determine if further recursion is possible and meaningful\n", + " unique_clusters = df_summary[\"cluster\"].nunique()\n", + " if level < n_levels and unique_clusters > 1:\n", + " # Use summaries as the input texts for the next level of recursion\n", + " new_texts = df_summary[\"summaries\"].tolist()\n", + " next_level_results = recursive_embed_cluster_summarize(\n", + " new_texts, level + 1, n_levels\n", + " )\n", + "\n", + " # Merge the results from the next level into the current results dictionary\n", + " results.update(next_level_results)\n", + "\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f0d8cd3e-cd49-484d-9617-1b9811cc08b3", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "f0d8cd3e-cd49-484d-9617-1b9811cc08b3", + "outputId": "f357d357-b457-4c67-d7a6-06d360c34533" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--Generated 1 clusters--\n" + ] + } + ], + "source": [ + "# Build tree\n", + "leaf_texts = docs_texts\n", + "results = recursive_embed_cluster_summarize(leaf_texts, level=1, n_levels=3)" + ] + }, + { + "cell_type": "markdown", + "id": "e80d7098-5d16-4fa6-837c-968e5c9f118d", + "metadata": { + "id": "e80d7098-5d16-4fa6-837c-968e5c9f118d" + }, + "source": [ + "The paper reports best performance from `collapsed tree retrieval`.\n", + "\n", + "This involves flattening the tree structure into a single layer and then applying a k-nearest neighbors (kNN) search across all nodes simultaneously.\n", + "\n", + "We do simply do this below." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d28ba9e6-9124-41a8-b4fd-55a6ef4ac062", + "metadata": { + "id": "d28ba9e6-9124-41a8-b4fd-55a6ef4ac062" + }, + "outputs": [], + "source": [ + "from langchain_community.vectorstores import Chroma\n", + "\n", + "# Initialize all_texts with leaf_texts\n", + "all_texts = leaf_texts.copy()\n", + "\n", + "# Iterate through the results to extract summaries from each level and add them to all_texts\n", + "for level in sorted(results.keys()):\n", + " # Extract summaries from the current level's DataFrame\n", + " summaries = results[level][1][\"summaries\"].tolist()\n", + " # Extend all_texts with the summaries from the current level\n", + " all_texts.extend(summaries)\n", + "\n", + "# Now, use all_texts to build the vectorstore with Chroma\n", + "vectorstore = Chroma.from_texts(texts=all_texts, embedding=embd)\n", + "retriever = vectorstore.as_retriever()" + ] + }, + { + "cell_type": "markdown", + "id": "0d497627-44c6-41f7-bb63-1d858d3f188f", + "metadata": { + "id": "0d497627-44c6-41f7-bb63-1d858d3f188f" + }, + "source": [ + "Now we can using our flattened, indexed tree in a RAG chain." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9d6c894b-b3a3-4a01-b779-3e98ea382ff5", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 54 + }, + "id": "9d6c894b-b3a3-4a01-b779-3e98ea382ff5", + "outputId": "fd62d9e3-2dca-4b62-d4ea-6c261c614e1c" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'In Generative AI Drug Discovery, cancer is being addressed through the use of advanced AI models like GPT and BERT to design new drugs. A specific example is the fine-tuning of Meta Llama 3 for cancer drug discovery, which involves creating de novo proteins tailored for therapeutic purposes.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "from langchain import hub\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "# Prompt\n", + "prompt = hub.pull(\"rlm/rag-prompt\")\n", + "\n", + "\n", + "# Post-processing\n", + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", + "\n", + "\n", + "# Chain\n", + "rag_chain = (\n", + " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | model\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "# Question\n", + "rag_chain.invoke(\"How is cancer being addressed in Generative AI Drug Discovery? Give me a specific example.\")" + ] + }, + { + "cell_type": "markdown", + "id": "0c585b37-ad83-4069-8f5d-4a6a3e15128d", + "metadata": { + "id": "0c585b37-ad83-4069-8f5d-4a6a3e15128d" + }, + "source": [ + "Trace:\n", + "\n", + "https://smith.langchain.com/public/1dabf475-1675-4494-b16c-928fbf079851/r" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "colab": { + "provenance": [], + "machine_shape": "hm", + "gpuType": "L4" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 5 +}