|
a |
|
b/request_handling.py |
|
|
1 |
import argparse |
|
|
2 |
from google.cloud import vision |
|
|
3 |
from google.cloud.vision import types |
|
|
4 |
import io |
|
|
5 |
from gcloud import storage |
|
|
6 |
|
|
|
7 |
def create_uri(bucket_name, blob_name): |
|
|
8 |
return "gs://" + bucket_name + "/" + blob_name |
|
|
9 |
|
|
|
10 |
def get_text(image_uri): |
|
|
11 |
client = vision.ImageAnnotatorClient() |
|
|
12 |
image = vision.types.Image() |
|
|
13 |
image.source.image_uri = image_uri |
|
|
14 |
response = client.document_text_detection(image=image) |
|
|
15 |
document = response |
|
|
16 |
return response |
|
|
17 |
|
|
|
18 |
def get_all_text(bucket_name, directory): |
|
|
19 |
client = storage.Client(project='medical-extraction') |
|
|
20 |
bucket = client.get_bucket(bucket_name) |
|
|
21 |
full_text = [] |
|
|
22 |
|
|
|
23 |
for blob in bucket.list_blobs(prefix=directory): |
|
|
24 |
print("----------------------" + blob.name + "---------------------------") |
|
|
25 |
next_doc = get_text(create_uri(bucket_name,blob.name)).full_text_annotation.text.splitlines() |
|
|
26 |
full_text += next_doc |
|
|
27 |
# print(full_text) |
|
|
28 |
for line in next_doc: |
|
|
29 |
print (line) |
|
|
30 |
|
|
|
31 |
return full_text |