medical_extraction / Git / [302778] /upload

Models:
philipB/
medical_extraction
Downloads: 1
[302778]: / upload_images.py
History
Download this file
77 lines (63 with data), 2.7 kB

from gcloud import storage
import argparse
from enum import Enum
import io
from google.cloud import vision
from google.cloud.vision import types
from PIL import Image, ImageDraw
import os
import tempfile
from pdf2image import convert_from_path, convert_from_bytes
import pdf2image

def convert_pdf(file_path, output_path=None):
    print(file_path)
    if ".JPG" in file_path:
        jpg = Image.open(file_path)
        jpg.save(output_path, 'JPEG', quality=80)
        return jpg

    if ".png" in file_path:
        png = Image.open(file_path)
        png.load() # required for png.split()

        background = Image.new("RGB", png.size, (255, 255, 255))
        background.paste(png, mask=png.split()[3]) # 3 is the alpha channel

        background.save(output_path, 'JPEG', quality=80)
        return background
    # save temp image files in temp dir, delete them after we are finished
    with tempfile.TemporaryDirectory() as temp_dir:
        # convert pdf to multiple image

        images = convert_from_path(file_path, output_folder=temp_dir)

        # save images to temporary directory
        temp_images = []
        for i in range(len(images)):
            image_path = f'{temp_dir}/{i}.jpg'
            images[i].save(image_path, 'JPEG')
            temp_images.append(image_path)
        # read images into pillow.Image
        imgs = list(map(Image.open, temp_images))
    # find minimum width of images
    min_img_width = min(i.width for i in imgs)
    # find total height of all images
    total_height = 0
    for i, img in enumerate(imgs):
        total_height += imgs[i].height
    # create new image object with width and total height
    merged_image = Image.new(imgs[0].mode, (min_img_width, total_height))
    # paste images together one by one
    y = 0
    for img in imgs:
        merged_image.paste(img, (0, y))
        y += img.height
    # save merged image
    merged_image.save(output_path, 'JPEG')
    return merged_image


if __name__ == '__main__':

    # data_path = '/Users/rhettd/Documents/Fall2019/MED_CONSULT/Data/fwdfacesheets/'
    data_path = '/Users/rhettd/Documents/Fall2019/MED_CONSULT/Data/CAA - CHICAGO ANESTHESIA ASSOCIATES SC/Single/'

    client = storage.Client(project='medical-extraction')
    bucket = client.get_bucket('report-ap')

    for file_name in os.listdir(data_path):
        if file_name != ".DS_Store" and file_name != "Done":
            image_name = file_name.split('.')[0]

            image = convert_pdf(data_path + file_name, data_path +"Done/"+ image_name + '.jpg')

            blob = bucket.blob("face_sheet_images/" + image_name + '.jpg')
            blob.upload_from_filename(data_path + "Done/"+ image_name+'.jpg')