[de9ba9]: / notebooks / Biospecimen_RAG_System.ipynb

Download this file

1 lines (1 with data), 48.6 kB

{"cells":[{"cell_type":"code","source":["# Install required packages\n","%pip install openai==1.12.0 azure-kusto-data langchain tenacity langchain-openai\n","\n","# Import libraries\n","from pyspark.sql import SparkSession\n","from notebookutils import mssparkutils\n","from openai import AzureOpenAI\n","import pandas as pd\n","import json\n","from tenacity import retry, wait_random_exponential, stop_after_attempt\n","\n","# Restart the kernel after installation if you get any import errors"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":10,"statement_ids":[5,6,7,8,9,10],"state":"finished","livy_statement_state":"available","session_id":"7206fc12-bc87-410f-b591-2e3863c232ba","normalized_state":"finished","queued_time":"2025-04-02T03:00:37.4725307Z","session_start_time":null,"execution_start_time":"2025-04-02T03:00:37.4746557Z","execution_finish_time":"2025-04-02T03:01:52.0302522Z","parent_msg_id":"da4114a8-ba0e-412b-9425-b7541f92f934"},"text/plain":"StatementMeta(, 7206fc12-bc87-410f-b591-2e3863c232ba, 10, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Collecting openai==1.12.0\n  Downloading openai-1.12.0-py3-none-any.whl.metadata (18 kB)\nCollecting azure-kusto-data\n  Downloading azure_kusto_data-5.0.2-py2.py3-none-any.whl.metadata (4.2 kB)\nCollecting langchain\n  Downloading langchain-0.3.22-py3-none-any.whl.metadata (7.8 kB)\nRequirement already satisfied: tenacity in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (8.2.3)\nCollecting langchain-openai\n  Downloading langchain_openai-0.3.11-py3-none-any.whl.metadata (2.3 kB)\nRequirement already satisfied: anyio<5,>=3.5.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from openai==1.12.0) (4.2.0)\nRequirement already satisfied: distro<2,>=1.7.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from openai==1.12.0) (1.8.0)\nCollecting httpx<1,>=0.23.0 (from openai==1.12.0)\n  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)\nCollecting pydantic<3,>=1.9.0 (from openai==1.12.0)\n  Downloading pydantic-2.11.1-py3-none-any.whl.metadata (63 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.5/63.5 kB\u001b[0m \u001b[31m316.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hRequirement already satisfied: sniffio in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from openai==1.12.0) (1.3.0)\nRequirement already satisfied: tqdm>4 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from openai==1.12.0) (4.65.0)\nRequirement already satisfied: typing-extensions<5,>=4.7 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from openai==1.12.0) (4.9.0)\nRequirement already satisfied: python-dateutil>=2.8.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from azure-kusto-data) (2.8.2)\nCollecting requests>=2.32.3 (from azure-kusto-data)\n  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\nCollecting azure-identity<2,>=1.21.0 (from azure-kusto-data)\n  Downloading azure_identity-1.21.0-py3-none-any.whl.metadata (81 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.3/81.3 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hRequirement already satisfied: msal<2,>=1.9.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from azure-kusto-data) (1.25.0)\nCollecting ijson~=3.1 (from azure-kusto-data)\n  Downloading ijson-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)\nRequirement already satisfied: azure-core<2,>=1.11.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from azure-kusto-data) (1.30.2)\nCollecting langchain-core<1.0.0,>=0.3.49 (from langchain)\n  Downloading langchain_core-0.3.49-py3-none-any.whl.metadata (5.9 kB)\nCollecting langchain-text-splitters<1.0.0,>=0.3.7 (from langchain)\n  Downloading langchain_text_splitters-0.3.7-py3-none-any.whl.metadata (1.9 kB)\nCollecting langsmith<0.4,>=0.1.17 (from langchain)\n  Downloading langsmith-0.3.22-py3-none-any.whl.metadata (15 kB)\nRequirement already satisfied: SQLAlchemy<3,>=1.4 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from langchain) (2.0.25)\nRequirement already satisfied: PyYAML>=5.3 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from langchain) (6.0.1)\nINFO: pip is looking at multiple versions of langchain-openai to determine which version is compatible with other requirements. This could take a while.\nCollecting langchain-openai\n  Downloading langchain_openai-0.3.10-py3-none-any.whl.metadata (2.3 kB)\n  Downloading langchain_openai-0.3.9-py3-none-any.whl.metadata (2.3 kB)\n  Downloading langchain_openai-0.3.8-py3-none-any.whl.metadata (2.3 kB)\n  Downloading langchain_openai-0.3.7-py3-none-any.whl.metadata (2.3 kB)\n  Downloading langchain_openai-0.3.6-py3-none-any.whl.metadata (2.3 kB)\n  Downloading langchain_openai-0.3.5-py3-none-any.whl.metadata (2.3 kB)\n  Downloading langchain_openai-0.3.4-py3-none-any.whl.metadata (2.3 kB)\nINFO: pip is still looking at multiple versions of langchain-openai to determine which version is compatible with other requirements. This could take a while.\n  Downloading langchain_openai-0.3.3-py3-none-any.whl.metadata (2.7 kB)\n  Downloading langchain_openai-0.3.2-py3-none-any.whl.metadata (2.7 kB)\n  Downloading langchain_openai-0.3.1-py3-none-any.whl.metadata (2.7 kB)\n  Downloading langchain_openai-0.3.0-py3-none-any.whl.metadata (2.7 kB)\n  Downloading langchain_openai-0.2.14-py3-none-any.whl.metadata (2.7 kB)\nINFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n  Downloading langchain_openai-0.2.13-py3-none-any.whl.metadata (2.7 kB)\n  Downloading langchain_openai-0.2.12-py3-none-any.whl.metadata (2.7 kB)\n  Downloading langchain_openai-0.2.11-py3-none-any.whl.metadata (2.7 kB)\n  Downloading langchain_openai-0.2.10-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.9-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.8-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.7-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.6-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.5-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.4-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.3-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.2-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.1-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.2.0-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.1.25-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.1.24-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.1.23-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.1.22-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.1.20-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.1.19-py3-none-any.whl.metadata (2.6 kB)\n  Downloading langchain_openai-0.1.17-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.16-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.15-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.14-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.13-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.12-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.11-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.10-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.9-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.8-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.7-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.6-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.5-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.4-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.3-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.2-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.1.1-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.0.8-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.0.7-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.0.6-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.0.5-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.0.4-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.0.3-py3-none-any.whl.metadata (2.5 kB)\n  Downloading langchain_openai-0.0.2.post1-py3-none-any.whl.metadata (2.4 kB)\n  Downloading langchain_openai-0.0.2-py3-none-any.whl.metadata (570 bytes)\nCollecting langchain\n  Downloading langchain-0.3.21-py3-none-any.whl.metadata (7.8 kB)\n  Downloading langchain-0.3.20-py3-none-any.whl.metadata (7.7 kB)\n  Downloading langchain-0.3.19-py3-none-any.whl.metadata (7.9 kB)\nRequirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from langchain) (3.9.3)\nRequirement already satisfied: numpy<2,>=1.26.4 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from langchain) (1.26.4)\n  Downloading langchain-0.3.18-py3-none-any.whl.metadata (7.8 kB)\n  Downloading langchain-0.3.17-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.16-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.15-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.14-py3-none-any.whl.metadata (7.1 kB)\nCollecting langsmith<0.3,>=0.1.17 (from langchain)\n  Downloading langsmith-0.2.11-py3-none-any.whl.metadata (14 kB)\nCollecting langchain\n  Downloading langchain-0.3.13-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.12-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.11-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.10-py3-none-any.whl.metadata (7.1 kB)\nCollecting langsmith<0.2.0,>=0.1.17 (from langchain)\n  Downloading langsmith-0.1.147-py3-none-any.whl.metadata (14 kB)\nCollecting langchain\n  Downloading langchain-0.3.9-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.8-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.7-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.6-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.5-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.4-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.3-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.2-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.1-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.3.0-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.2.17-py3-none-any.whl.metadata (7.1 kB)\nCollecting langchain-core<0.3.0,>=0.2.43 (from langchain)\n  Downloading langchain_core-0.2.43-py3-none-any.whl.metadata (6.2 kB)\nCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)\n  Downloading langchain_text_splitters-0.2.4-py3-none-any.whl.metadata (2.3 kB)\nCollecting langchain\n  Downloading langchain-0.2.16-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.2.15-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.2.14-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.2.13-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.2.12-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.2.11-py3-none-any.whl.metadata (7.1 kB)\n  Downloading langchain-0.2.10-py3-none-any.whl.metadata (6.9 kB)\n  Downloading langchain-0.2.9-py3-none-any.whl.metadata (6.9 kB)\n  Downloading langchain-0.2.8-py3-none-any.whl.metadata (6.9 kB)\n  Downloading langchain-0.2.7-py3-none-any.whl.metadata (6.9 kB)\n  Downloading langchain-0.2.6-py3-none-any.whl.metadata (7.0 kB)\n  Downloading langchain-0.2.5-py3-none-any.whl.metadata (7.0 kB)\n  Downloading langchain-0.2.4-py3-none-any.whl.metadata (7.0 kB)\n  Downloading langchain-0.2.3-py3-none-any.whl.metadata (6.9 kB)\n  Downloading langchain-0.2.2-py3-none-any.whl.metadata (13 kB)\n  Downloading langchain-0.2.1-py3-none-any.whl.metadata (13 kB)\n  Downloading langchain-0.2.0-py3-none-any.whl.metadata (13 kB)\nCollecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\nCollecting langchain\n  Downloading langchain-0.1.20-py3-none-any.whl.metadata (13 kB)\nCollecting langchain-community<0.1,>=0.0.38 (from langchain)\n  Downloading langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)\nCollecting langchain-core<0.2.0,>=0.1.52 (from langchain)\n  Downloading langchain_core-0.1.53-py3-none-any.whl.metadata (5.9 kB)\nCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)\n  Downloading langchain_text_splitters-0.0.2-py3-none-any.whl.metadata (2.2 kB)\nCollecting tiktoken<1,>=0.5.2 (from langchain-openai)\n  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\nCollecting jsonpatch<2.0,>=1.33 (from langchain-core<0.2.0,>=0.1.52->langchain)\n  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\nCollecting packaging<24.0,>=23.2 (from langchain-core<0.2.0,>=0.1.52->langchain)\n  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)\nRequirement already satisfied: aiosignal>=1.1.2 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.2.0)\nRequirement already satisfied: attrs>=17.3.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\nRequirement already satisfied: frozenlist>=1.1.1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\nRequirement already satisfied: yarl<2.0,>=1.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.3)\nRequirement already satisfied: idna>=2.8 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from anyio<5,>=3.5.0->openai==1.12.0) (3.4)\nRequirement already satisfied: six>=1.11.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from azure-core<2,>=1.11.0->azure-kusto-data) (1.16.0)\nCollecting azure-core<2,>=1.11.0 (from azure-kusto-data)\n  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)\nRequirement already satisfied: cryptography>=2.5 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from azure-identity<2,>=1.21.0->azure-kusto-data) (42.0.2)\nCollecting msal<2,>=1.9.0 (from azure-kusto-data)\n  Downloading msal-1.32.0-py3-none-any.whl.metadata (11 kB)\nCollecting msal-extensions>=1.2.0 (from azure-identity<2,>=1.21.0->azure-kusto-data)\n  Downloading msal_extensions-1.3.1-py3-none-any.whl.metadata (7.8 kB)\nCollecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)\nCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\nRequirement already satisfied: certifi in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from httpx<1,>=0.23.0->openai==1.12.0) (2024.2.2)\nCollecting httpcore==1.* (from httpx<1,>=0.23.0->openai==1.12.0)\n  Downloading httpcore-1.0.7-py3-none-any.whl.metadata (21 kB)\nCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai==1.12.0)\n  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\nCollecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)\n  Downloading orjson-3.10.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (41 kB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.8/41.8 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hCollecting requests-toolbelt<2.0.0,>=1.0.0 (from langsmith<0.2.0,>=0.1.17->langchain)\n  Downloading requests_toolbelt-1.0.0-py2.py3-none-any.whl.metadata (14 kB)\nRequirement already satisfied: PyJWT<3,>=1.0.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from PyJWT[crypto]<3,>=1.0.0->msal<2,>=1.9.0->azure-kusto-data) (2.4.0)\nCollecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->openai==1.12.0)\n  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\nCollecting pydantic-core==2.33.0 (from pydantic<3,>=1.9.0->openai==1.12.0)\n  Downloading pydantic_core-2.33.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)\nCollecting typing-extensions<5,>=4.7 (from openai==1.12.0)\n  Downloading typing_extensions-4.13.0-py3-none-any.whl.metadata (3.0 kB)\nCollecting typing-inspection>=0.4.0 (from pydantic<3,>=1.9.0->openai==1.12.0)\n  Downloading typing_inspection-0.4.0-py3-none-any.whl.metadata (2.6 kB)\nRequirement already satisfied: charset-normalizer<4,>=2 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from requests>=2.32.3->azure-kusto-data) (2.0.4)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from requests>=2.32.3->azure-kusto-data) (2.1.0)\nRequirement already satisfied: greenlet!=0.4.17 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.1)\nRequirement already satisfied: regex>=2022.1.18 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from tiktoken<1,>=0.5.2->langchain-openai) (2023.10.3)\nRequirement already satisfied: cffi>=1.12 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from cryptography>=2.5->azure-identity<2,>=1.21.0->azure-kusto-data) (1.16.0)\nRequirement already satisfied: jsonpointer>=1.9 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.2.0,>=0.1.52->langchain) (2.1)\nRequirement already satisfied: mypy-extensions>=0.3.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain) (1.0.0)\nRequirement already satisfied: pycparser in /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages (from cffi>=1.12->cryptography>=2.5->azure-identity<2,>=1.21.0->azure-kusto-data) (2.21)\nDownloading openai-1.12.0-py3-none-any.whl (226 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading azure_kusto_data-5.0.2-py2.py3-none-any.whl (52 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.2/52.2 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading langchain-0.1.20-py3-none-any.whl (1.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading langchain_openai-0.1.5-py3-none-any.whl (34 kB)\nDownloading langchain_core-0.1.53-py3-none-any.whl (303 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m303.1/303.1 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading azure_identity-1.21.0-py3-none-any.whl (189 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m189.2/189.2 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading azure_core-1.32.0-py3-none-any.whl (198 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m198.9/198.9 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\nDownloading httpx-0.28.1-py3-none-any.whl (73 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.5/73.5 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading httpcore-1.0.7-py3-none-any.whl (78 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.6/78.6 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading ijson-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (119 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.2/119.2 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading langchain_community-0.0.38-py3-none-any.whl (2.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0mm\n\u001b[?25hDownloading langchain_text_splitters-0.0.2-py3-none-any.whl (23 kB)\nDownloading langsmith-0.1.147-py3-none-any.whl (311 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.8/311.8 kB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading msal-1.32.0-py3-none-any.whl (114 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.7/114.7 kB\u001b[0m \u001b[31m34.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading pydantic-2.11.1-py3-none-any.whl (442 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m442.6/442.6 kB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading pydantic_core-2.33.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n\u001b[?25hDownloading requests-2.32.3-py3-none-any.whl (64 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading typing_extensions-4.13.0-py3-none-any.whl (45 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\nDownloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\nDownloading marshmallow-3.26.1-py3-none-any.whl (50 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.9/50.9 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading msal_extensions-1.3.1-py3-none-any.whl (20 kB)\nDownloading orjson-3.10.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (132 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.8/132.8 kB\u001b[0m \u001b[31m45.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading packaging-23.2-py3-none-any.whl (53 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading requests_toolbelt-1.0.0-py2.py3-none-any.whl (54 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.5/54.5 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\nDownloading typing_inspection-0.4.0-py3-none-any.whl (14 kB)\nDownloading h11-0.14.0-py3-none-any.whl (58 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m21.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: ijson, typing-extensions, requests, packaging, orjson, jsonpatch, h11, annotated-types, typing-inspection, typing-inspect, tiktoken, requests-toolbelt, pydantic-core, marshmallow, httpcore, azure-core, pydantic, httpx, dataclasses-json, openai, msal, langsmith, msal-extensions, langchain-core, langchain-text-splitters, langchain-openai, langchain-community, azure-identity, langchain, azure-kusto-data\n  Attempting uninstall: typing-extensions\n    Found existing installation: typing_extensions 4.9.0\n    Not uninstalling typing-extensions at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'typing_extensions'. No files were found to uninstall.\n  Attempting uninstall: requests\n    Found existing installation: requests 2.31.0\n    Not uninstalling requests at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'requests'. No files were found to uninstall.\n  Attempting uninstall: packaging\n    Found existing installation: packaging 23.1\n    Not uninstalling packaging at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'packaging'. No files were found to uninstall.\n  Attempting uninstall: jsonpatch\n    Found existing installation: jsonpatch 1.32\n    Not uninstalling jsonpatch at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'jsonpatch'. No files were found to uninstall.\n  Attempting uninstall: azure-core\n    Found existing installation: azure-core 1.30.2\n    Not uninstalling azure-core at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'azure-core'. No files were found to uninstall.\n  Attempting uninstall: msal\n    Found existing installation: msal 1.25.0\n    Not uninstalling msal at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'msal'. No files were found to uninstall.\n  Attempting uninstall: msal-extensions\n    Found existing installation: msal-extensions 1.0.0\n    Not uninstalling msal-extensions at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'msal-extensions'. No files were found to uninstall.\n  Attempting uninstall: azure-identity\n    Found existing installation: azure-identity 1.15.0\n    Not uninstalling azure-identity at /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages, outside environment /nfs4/pyenv-8227ab4e-8921-40c4-a454-e5562abdce69\n    Can't uninstall 'azure-identity'. No files were found to uninstall.\nSuccessfully installed annotated-types-0.7.0 azure-core-1.32.0 azure-identity-1.21.0 azure-kusto-data-5.0.2 dataclasses-json-0.6.7 h11-0.14.0 httpcore-1.0.7 httpx-0.28.1 ijson-3.3.0 jsonpatch-1.33 langchain-0.1.20 langchain-community-0.0.38 langchain-core-0.1.53 langchain-openai-0.1.5 langchain-text-splitters-0.0.2 langsmith-0.1.147 marshmallow-3.26.1 msal-1.32.0 msal-extensions-1.3.1 openai-1.12.0 orjson-3.10.16 packaging-23.2 pydantic-2.11.1 pydantic-core-2.33.0 requests-2.32.3 requests-toolbelt-1.0.0 tiktoken-0.9.0 typing-extensions-4.13.0 typing-inspect-0.9.0 typing-inspection-0.4.0\n\n\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\nNote: you may need to restart the kernel to use updated packages.\nWarning: PySpark kernel has been restarted to use updated packages.\n\n"]}],"execution_count":3,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"03c6b024-50aa-49be-8499-08e522077c19"},{"cell_type":"code","source":["# Configuration\n","OPENAI_GPT4_DEPLOYMENT_NAME = \"gpt-4\"\n","OPENAI_DEPLOYMENT_ENDPOINT = mssparkutils.credentials.getSecret(\"openai-scope\", \"OPENAI_ENDPOINT\")\n","OPENAI_API_KEY = mssparkutils.credentials.getSecret(\"openai-scope\", \"OPENAI_KEY\")\n","OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = \"text-embedding-ada-002\"\n","\n","KUSTO_URI = \"https://trd-zdxwqrcu1znbqygpxg.z2.kusto.fabric.microsoft.com\"\n","KUSTO_DATABASE = \"BioEventHouse\"\n","KUSTO_TABLE = \"biospecimen_embeddings\"\n","accessToken = mssparkutils.credentials.getToken(KUSTO_URI)\n","\n","client = AzureOpenAI(\n","    azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,\n","    api_key=OPENAI_API_KEY,\n","    api_version=\"2023-09-01-preview\"\n",")\n","\n","@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n","def generate_embeddings(text):\n","    txt = text.replace(\"\\n\", \" \")\n","    return client.embeddings.create(input=[txt], model=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME).data[0].embedding\n","\n","# Data Preparation\n","def prepare_data():\n","    # Read from Lakehouse\n","    df = spark.read.format(\"csv\").option(\"header\", \"true\").load(\"/lakehouse/default/Files/biospecimen_data.csv\")\n","    \n","    # Convert to pandas for easier processing\n","    pdf = df.toPandas()\n","    \n","    # Create document text\n","    def create_document_text(row):\n","        text_parts = []\n","        if pd.notna(row['Sample Type']):\n","            text_parts.append(f\"Sample Type: {row['Sample Type']}\")\n","        if pd.notna(row['Primary Site']):\n","            text_parts.append(f\"Primary Site: {row['Primary Site']}\")\n","        # Add other fields as shown in previous example\n","        return \". \".join(text_parts)\n","    \n","    pdf['document_text'] = pdf.apply(create_document_text, axis=1)\n","    \n","    # Generate embeddings\n","    pdf['embedding'] = pdf['document_text'].apply(lambda x: generate_embeddings(x))\n","    \n","    # Prepare for Kusto\n","    embeddings_df = pd.DataFrame({\n","        'document_id': pdf['Aliquot ID'],\n","        'content': pdf['document_text'],\n","        'metadata': pdf.apply(lambda x: {\n","            'sample_type': x['Sample Type'],\n","            'primary_site': x['Primary Site'],\n","            'case_id': x['Case Submitter ID']\n","        }, axis=1),\n","        'embedding': pdf['embedding']\n","    })\n","    \n","    return spark.createDataFrame(embeddings_df)\n","\n","# Store in Eventhouse\n","def store_embeddings(embeddings_df):\n","    embeddings_df.write.\\\n","        format(\"com.microsoft.kusto.spark.synapse.datasource\").\\\n","        option(\"kustoCluster\", KUSTO_URI).\\\n","        option(\"kustoDatabase\", KUSTO_DATABASE).\\\n","        option(\"kustoTable\", KUSTO_TABLE).\\\n","        option(\"accessToken\", accessToken).\\\n","        mode(\"Append\").save()\n","\n","# Query System\n","def query_biospecimen_data(question, nr_of_answers=3):\n","    searchedEmbedding = generate_embeddings(question)\n","    \n","    kusto_query = f\"\"\"\n","    {KUSTO_TABLE} \n","    | extend similarity = series_cosine_similarity(dynamic({str(searchedEmbedding)}), embedding) \n","    | top {nr_of_answers} by similarity desc\n","    | project content, metadata, similarity\n","    \"\"\"\n","    \n","    kustoDf = spark.read\\\n","        .format(\"com.microsoft.kusto.spark.synapse.datasource\")\\\n","        .option(\"kustoCluster\", KUSTO_URI)\\\n","        .option(\"kustoDatabase\", KUSTO_DATABASE)\\\n","        .option(\"accessToken\", accessToken)\\\n","        .option(\"kustoQuery\", kusto_query).load()\n","    \n","    results = [row.asDict() for row in kustoDf.collect()]\n","    \n","    # Prepare context for LLM\n","    context = \"\\n\\n\".join([f\"Record {i+1} (Similarity: {r['similarity']:.2f}):\\n{r['content']}\\nMetadata: {r['metadata']}\" \n","                          for i, r in enumerate(results)])\n","    \n","    prompt = f\"\"\"\n","    You are a biomedical research assistant analyzing breast cancer biospecimen data.\n","    Answer the user's question based on the following records. Be precise and cite specific records when relevant.\n","    \n","    Question: {question}\n","    \n","    Relevant Biospecimen Records:\n","    {context}\n","    \"\"\"\n","    \n","    response = client.chat.completions.create(\n","        model=OPENAI_GPT4_DEPLOYMENT_NAME,\n","        messages=[\n","            {\"role\": \"system\", \"content\": \"You are a knowledgeable biomedical research assistant.\"},\n","            {\"role\": \"user\", \"content\": prompt}\n","        ],\n","        temperature=0\n","    )\n","    \n","    return {\n","        \"answer\": response.choices[0].message.content,\n","        \"sources\": results\n","    }\n","\n","# Initialize the system (run once)\n","embeddings_df = prepare_data()\n","store_embeddings(embeddings_df)\n","print(\"RAG system initialized successfully!\")\n","# Type here in the cell editor to add code!\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":13,"statement_ids":[13],"state":"finished","livy_statement_state":"available","session_id":"7206fc12-bc87-410f-b591-2e3863c232ba","normalized_state":"finished","queued_time":"2025-04-02T03:13:13.5710306Z","session_start_time":null,"execution_start_time":"2025-04-02T03:13:13.5722882Z","execution_finish_time":"2025-04-02T03:13:13.9466356Z","parent_msg_id":"01922cad-6082-4853-b206-e279b9aebe88"},"text/plain":"StatementMeta(, 7206fc12-bc87-410f-b591-2e3863c232ba, 13, Finished, Available, Finished)"},"metadata":{}},{"output_type":"error","ename":"Py4JJavaError","evalue":"An error occurred while calling z:mssparkutils.credentials.getSecret.\n: com.microsoft.azure.trident.tokenlibrary.util.AkvHttpClientException: Invalid vault uri. Uri should match azure key vault URI like https://<keyVaultName>.vault.azure.net/\n\tat com.microsoft.azure.trident.tokenlibrary.util.AkvBasedSecretProviderClientImpl.invokeGetTarget(AkvBasedSecretProviderClient.scala:122)\n\tat com.microsoft.azure.trident.tokenlibrary.util.AkvBasedSecretProviderClientImpl.getAkvSecretWithAccessToken(AkvBasedSecretProviderClient.scala:153)\n\tat com.microsoft.azure.trident.tokenlibrary.TokenLibrary.getSecretWithToken(TokenLibrary.scala:806)\n\tat com.microsoft.azure.trident.tokenlibrary.TokenLibrary$.getSecretWithToken(TokenLibrary.scala:1359)\n\tat mssparkutils.credentials$.getSecret(credentials.scala:166)\n\tat mssparkutils.credentials.getSecret(credentials.scala)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:566)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.base/java.lang.Thread.run(Thread.java:829)\n","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mPy4JJavaError\u001b[0m                             Traceback (most recent call last)","Cell \u001b[0;32mIn[13], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Configuration\u001b[39;00m\n\u001b[1;32m      2\u001b[0m OPENAI_GPT4_DEPLOYMENT_NAME \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgpt-4\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m OPENAI_DEPLOYMENT_ENDPOINT \u001b[38;5;241m=\u001b[39m mssparkutils\u001b[38;5;241m.\u001b[39mcredentials\u001b[38;5;241m.\u001b[39mgetSecret(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mopenai-scope\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_ENDPOINT\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      4\u001b[0m OPENAI_API_KEY \u001b[38;5;241m=\u001b[39m mssparkutils\u001b[38;5;241m.\u001b[39mcredentials\u001b[38;5;241m.\u001b[39mgetSecret(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mopenai-scope\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      5\u001b[0m OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-embedding-ada-002\u001b[39m\u001b[38;5;124m\"\u001b[39m\n","File \u001b[0;32m~/cluster-env/trident_env/lib/python3.11/site-packages/notebookutils/mssparkutils/credentials.py:27\u001b[0m, in \u001b[0;36mgetSecret\u001b[0;34m(akvName, secret, linkedService)\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgetSecret\u001b[39m(akvName, secret, linkedService\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m     26\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m linkedService \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m---> 27\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m creds\u001b[38;5;241m.\u001b[39mgetSecret(akvName, secret)\n\u001b[1;32m     28\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     29\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m creds\u001b[38;5;241m.\u001b[39mgetSecret(akvName, secret, linkedService)\n","File \u001b[0;32m~/cluster-env/trident_env/lib/python3.11/site-packages/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m   1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m   1317\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m   1318\u001b[0m     args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m   1319\u001b[0m     proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m   1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m get_return_value(\n\u001b[1;32m   1323\u001b[0m     answer, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtarget_id, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m   1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m   1326\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n","File \u001b[0;32m/opt/spark/python/lib/pyspark.zip/pyspark/errors/exceptions/captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception.<locals>.deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m    177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m    178\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m f(\u001b[38;5;241m*\u001b[39ma, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw)\n\u001b[1;32m    180\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    181\u001b[0m         converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n","File \u001b[0;32m~/cluster-env/trident_env/lib/python3.11/site-packages/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m    324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m    325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m    327\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m    328\u001b[0m         \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m    329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    330\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m    331\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m    332\u001b[0m         \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n","\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling z:mssparkutils.credentials.getSecret.\n: com.microsoft.azure.trident.tokenlibrary.util.AkvHttpClientException: Invalid vault uri. Uri should match azure key vault URI like https://<keyVaultName>.vault.azure.net/\n\tat com.microsoft.azure.trident.tokenlibrary.util.AkvBasedSecretProviderClientImpl.invokeGetTarget(AkvBasedSecretProviderClient.scala:122)\n\tat com.microsoft.azure.trident.tokenlibrary.util.AkvBasedSecretProviderClientImpl.getAkvSecretWithAccessToken(AkvBasedSecretProviderClient.scala:153)\n\tat com.microsoft.azure.trident.tokenlibrary.TokenLibrary.getSecretWithToken(TokenLibrary.scala:806)\n\tat com.microsoft.azure.trident.tokenlibrary.TokenLibrary$.getSecretWithToken(TokenLibrary.scala:1359)\n\tat mssparkutils.credentials$.getSecret(credentials.scala:166)\n\tat mssparkutils.credentials.getSecret(credentials.scala)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:566)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.base/java.lang.Thread.run(Thread.java:829)\n"]}],"execution_count":5,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"313fef4a-d707-40db-b34c-f6e1b84bac37"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"dependencies":{}},"nbformat":4,"nbformat_minor":5}