Diff of /upload_images_aws.py [000000] .. [302778]

Switch to unified view

a b/upload_images_aws.py
1
from gcloud import storage
2
import argparse
3
from enum import Enum
4
import io
5
from google.cloud import vision
6
from google.cloud.vision import types
7
from PIL import Image, ImageDraw
8
import os
9
import tempfile
10
from pdf2image import convert_from_path, convert_from_bytes
11
import pdf2image
12
13
def convert_pdf(file_path, output_path=None):
14
    print(file_path)
15
    if ".JPG" in file_path:
16
        jpg = Image.open(file_path)
17
        jpg.save(output_path, 'JPEG', quality=80)
18
        return jpg
19
20
    if ".png" in file_path:
21
        png = Image.open(file_path)
22
        png.load() # required for png.split()
23
24
        background = Image.new("RGB", png.size, (255, 255, 255))
25
        background.paste(png, mask=png.split()[3]) # 3 is the alpha channel
26
27
        background.save(output_path, 'JPEG', subsampling=0, quality=100)
28
        return background
29
    # save temp image files in temp dir, delete them after we are finished
30
    with tempfile.TemporaryDirectory() as temp_dir:
31
        # convert pdf to multiple image
32
33
        images = convert_from_path(file_path, output_folder=temp_dir)
34
35
        # save images to temporary directory
36
        temp_images = []
37
        for i in range(len(images)):
38
            image_path = f'{temp_dir}/{i}.jpg'
39
            images[i].save(image_path, 'JPEG')
40
            temp_images.append(image_path)
41
        # read images into pillow.Image
42
        imgs = list(map(Image.open, temp_images))
43
    # find minimum width of images
44
    min_img_width = min(i.width for i in imgs)
45
    # find total height of all images
46
    total_height = 0
47
    for i, img in enumerate(imgs):
48
        total_height += imgs[i].height
49
    # create new image object with width and total height
50
    merged_image = Image.new(imgs[0].mode, (min_img_width, total_height))
51
    # paste images together one by one
52
    y = 0
53
    for img in imgs:
54
        merged_image.paste(img, (0, y))
55
        y += img.height
56
    # save merged image
57
    merged_image.save(output_path, 'JPEG', subsampling=0, quality=100)
58
    return merged_image
59
60
61
if __name__ == '__main__':
62
63
    # data_path = '/Users/rhettd/Documents/Fall2019/MED_CONSULT/Data/fwdfacesheets/'
64
    data_path = '/Users/rhettd/Documents/Fall2019/MED_CONSULT/Data/XWP - ARCHANA WAGLE PC/'
65
66
    # client = storage.Client(project='medical-extraction')
67
    # bucket = client.get_bucket('report-ap')
68
69
    for file_name in os.listdir(data_path):
70
        if file_name != ".DS_Store" and file_name != "Done":
71
            image_name = file_name.split('.')[0]
72
73
            image = convert_pdf(data_path + file_name, data_path +"Done/"+ image_name + '.jpg')
74
75
            # blob = bucket.blob("face_sheet_images/" + image_name + '.jpg')
76
            # blob.upload_from_filename(data_path + "Done/"+ image_name+'.jpg')