--- a +++ b/request_handling_aws.py @@ -0,0 +1,90 @@ +import argparse +import io +import boto3 +from trp import Document + +# https://facesheet-ap.s3.amazonaws.com/000a5953-9b4a-4abd-9002-a1347ba949e2.png +client_text = boto3.client('textract') +client_s3 = boto3.client('s3') +client_dynamo = boto3.client('dynamodb') +med_comp_client = boto3.client("comprehendmedical") + +def get_text(bucket_name, key): + response = client_text.detect_document_text( Document={'S3Object': {'Bucket': bucket_name, 'Name': key}}) + doc_text = [] + for item in response["Blocks"]: + if item["BlockType"] == "LINE": + doc_text.append(item["Text"]) + return doc_text, key.split("/")[1].split(".")[0] + +def get_all_text(bucket_name, directory, event_list = None, require_form=False): + + full_text = [] + ids = [] + form_data = [] + + if event_list != None: + for s3_item in event_list: + next_doc, id = get_text(bucket_name, s3_item['s3']['object']['key']) + ids.append(id) + full_text += next_doc + if require_form == True: + form = get_text_analysis(bucket_name, s3_item['s3']['object']['key']) + form_data.append(form) + + else: + bucket_list = client_s3.list_objects_v2(Bucket = bucket_name, Prefix=directory) + for blob in bucket_list['Contents']: + if blob['Key'] != directory: + print("----------------------" + blob['Key'] + "---------------------------") + next_doc, id = get_text(bucket_name,blob['Key']) + print(id) + print(next_doc) + ids.append(id) + full_text += next_doc + # print(full_text) + for line in next_doc: + print (line) + if require_form == True: + form = get_text_analysis(bucket_name, blob['Key']) + form_data.append(form) + + if len(form_data) > 0: + return full_text, ids, form_data + else: + return full_text, ids + +def get_comprehend(text_block): + detection_map = med_comp_client.detect_entities_v2(Text='\n'.join(text_block)) + # out_map = {} + # for entity in detection_map["Entities"]: + # out_map[entity['Type']] = entity['Text'] + + # return out_map + return detection_map["Entities"] + +def get_text_analysis(bucket_name, key): + response_analysis = client_text.analyze_document( Document={'S3Object': {'Bucket': bucket_name, 'Name': key}}, FeatureTypes=['FORMS']) + extract = Document(response_analysis) + form_ext = [] + for page in extract.pages: + print("Key Value Pairs:") + for headings in page.form.fields: + print("Detected Key: {}, Detected Value: {}".format(headings.key, headings.value)) + form_ext.append((str(headings.key), str(headings.value))) + + return form_ext + +def put_dynamo(table_name, df, ids): + put_dict = {} + list_records = df.to_dict("records") + print(ids) + for dict,id in zip(list_records,ids): + for key, val in dict.items(): + if val != None: + put_dict[key] = {"S" : val} + + print(id) + put_dict["patient_id"] = {"S" : id} + client_dynamo.put_item(TableName=table_name, Item=put_dict) + print("Patient Item Stored. ID = " + id)