--- a +++ b/experiment/ocr_gemini.ipynb @@ -0,0 +1,134 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C:\\Users\\hoang\\OneDrive\\Desktop\\nghich_prj\\Prescription-Text-Extraction-Pharmacy-Information-Retrieval\n" + ] + } + ], + "source": [ + "%cd C:\\Users\\hoang\\OneDrive\\Desktop\\nghich_prj\\Prescription-Text-Extraction-Pharmacy-Information-Retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import google.generativeai as genai\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from PIL import Image\n", + "\n", + "\n", + "load_dotenv()\n", + "genai.configure(api_key=os.environ[\"GEMINI_API_KEY\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "model = genai.GenerativeModel(\"gemini-1.5-flash\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "PROMPT= \"\"\"\n", + "Extract information for all medications and instructions listed in the given prescription in the following format:\n", + "[\n", + " {\n", + " name: \"medication name\",\n", + " \"amount\": \"dosage\",\n", + " \"guide\": \"usage instructions, e.g., 1 tablet in the morning\",\n", + " \"note\": \"additional instructions\"\n", + " }\n", + "]\n", + "\n", + "Just return the desired format.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "img = genai.upload_file(path=r\"C:\\Users\\hoang\\OneDrive\\Desktop\\nghich_prj\\Prescription-Text-Extraction-Pharmacy-Information-Retrieval\\assets\\ocr_sample\\bo-y-te-lui-lo-trinh-bat-buoc-benh-vien-hang-3-tro-len-phai-bo-ke-don-thuoc-viet-tay-202424227.jpg\", display_name=\"Prescription\")\n", + "res = model.generate_content([img, PROMPT])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"name\": \"Bitex beules\",\n", + " \"amount\": \"800mg\",\n", + " \"guide\": \"4 viên/ ngày\",\n", + " \"note\": \"sáng 2 viên, tối 2 viên\"\n", + " },\n", + " {\n", + " \"name\": \"Keralian\",\n", + " \"amount\": \"600\",\n", + " \"guide\": \"4 viên/ ngày\",\n", + " \"note\": \"sáng 2 viên, tối 2 viên\"\n", + " },\n", + " {\n", + " \"name\": \"Debelecol\",\n", + " \"amount\": \"600mg\",\n", + " \"guide\": \"15 ml\",\n", + " \"note\": \"trước bữa sáng\"\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "print(res.text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}