Diff of /request_handling_aws.py [000000] .. [302778]

Switch to side-by-side view

--- a
+++ b/request_handling_aws.py
@@ -0,0 +1,90 @@
+import argparse
+import io
+import boto3
+from trp import Document
+
+# https://facesheet-ap.s3.amazonaws.com/000a5953-9b4a-4abd-9002-a1347ba949e2.png
+client_text = boto3.client('textract')
+client_s3 = boto3.client('s3')
+client_dynamo = boto3.client('dynamodb')
+med_comp_client = boto3.client("comprehendmedical")
+
+def get_text(bucket_name, key):
+    response = client_text.detect_document_text( Document={'S3Object': {'Bucket': bucket_name, 'Name': key}})
+    doc_text = []
+    for item in response["Blocks"]:
+        if item["BlockType"] == "LINE":
+            doc_text.append(item["Text"])
+    return doc_text, key.split("/")[1].split(".")[0]
+
+def get_all_text(bucket_name, directory, event_list = None, require_form=False):
+
+    full_text = []
+    ids = []
+    form_data = []
+
+    if event_list != None:
+        for s3_item in event_list:
+            next_doc, id = get_text(bucket_name, s3_item['s3']['object']['key'])
+            ids.append(id)
+            full_text += next_doc
+            if require_form == True:
+                form = get_text_analysis(bucket_name, s3_item['s3']['object']['key'])
+                form_data.append(form)
+
+    else:
+        bucket_list = client_s3.list_objects_v2(Bucket = bucket_name, Prefix=directory)
+        for blob in bucket_list['Contents']:
+            if blob['Key'] != directory:
+                print("----------------------" + blob['Key'] + "---------------------------")
+                next_doc, id = get_text(bucket_name,blob['Key'])
+                print(id)
+                print(next_doc)
+                ids.append(id)
+                full_text += next_doc
+                # print(full_text)
+                for line in next_doc:
+                    print (line)
+                if require_form == True:
+                    form = get_text_analysis(bucket_name, blob['Key'])
+                    form_data.append(form)
+
+    if len(form_data) > 0:
+        return full_text, ids, form_data
+    else:
+        return full_text, ids
+
+def get_comprehend(text_block):
+    detection_map = med_comp_client.detect_entities_v2(Text='\n'.join(text_block))
+    # out_map = {}
+    # for entity in detection_map["Entities"]:
+    #     out_map[entity['Type']] = entity['Text']
+
+    # return out_map
+    return detection_map["Entities"]
+
+def get_text_analysis(bucket_name, key):
+    response_analysis = client_text.analyze_document( Document={'S3Object': {'Bucket': bucket_name, 'Name': key}}, FeatureTypes=['FORMS'])
+    extract = Document(response_analysis)
+    form_ext = []
+    for page in extract.pages:
+        print("Key Value Pairs:")
+        for headings in page.form.fields:
+            print("Detected Key: {}, Detected Value: {}".format(headings.key, headings.value))
+            form_ext.append((str(headings.key), str(headings.value)))
+
+    return form_ext
+
+def put_dynamo(table_name, df, ids):
+    put_dict = {}
+    list_records = df.to_dict("records")
+    print(ids)
+    for dict,id in zip(list_records,ids):
+        for key, val in dict.items():
+            if val != None:
+                put_dict[key] = {"S" : val}
+
+        print(id)
+        put_dict["patient_id"] = {"S" : id}
+        client_dynamo.put_item(TableName=table_name, Item=put_dict)
+        print("Patient Item Stored. ID = " + id)