Diff of /fetch_data/data_client.py [000000] .. [4f54f1]

Switch to unified view

a b/fetch_data/data_client.py
1
import os
2
3
from google.cloud import storage
4
import googleapiclient.discovery
5
6
from oauth2client.client import GoogleCredentials
7
8
9
credentials = GoogleCredentials.get_application_default()
10
11
12
def create_service():
13
    # Construct the service object for interacting with the Cloud Storage API -
14
    # the 'storage' service, at version 'v1'.
15
    # You can browse other available api services and versions here:
16
    #     http://g.co/dv/api-client-library/python/apis/
17
    return googleapiclient.discovery.build('storage', 'v1', credentials=credentials)
18
19
20
def list_bucket(bucket):
21
    """Returns a list of metadata of the objects within the given bucket."""
22
    service = create_service()
23
24
    # Create a request to objects.list to retrieve a list of objects.
25
    fields_to_return = \
26
        'nextPageToken,items(name,size,contentType,metadata(my-key))'
27
    req = service.objects().list(bucket=bucket, fields=fields_to_return)
28
29
    all_objects = []
30
    # If too many items to list in one request, list_next() will
31
    # automatically handle paging with the pageToken.
32
    while req:
33
        resp = req.execute()
34
        all_objects.extend(resp.get('items', []))
35
        req = service.objects().list_next(req, resp)
36
37
    return all_objects
38
39
40
def collect_images(bucket_name, project_name, working_dir='./'):
41
    all_blobs = map(lambda item: item['name'], list_bucket(bucket_name))
42
43
    client = storage.Client(project=project_name)
44
    bucket = client.get_bucket(bucket_name)
45
46
    for blob_item in all_blobs:
47
        blob = storage.Blob(blob_item, bucket)
48
        complete_path = os.path.join(working_dir, blob_item)
49
        dir_name = os.path.dirname(complete_path)
50
51
        if not os.path.exists(dir_name):
52
            os.makedirs(dir_name)
53
54
        if os.path.exists(complete_path):
55
            print("Skipping {} since it already exists.".format(blob_item))
56
            continue
57
            
58
        with open(complete_path, 'wb') as file_obj:
59
            try:
60
                blob.download_to_file(file_obj)
61
                print("Stored blob path: ", complete_path)
62
            except Exception as e:
63
                print("Downloading {} failed with {}.".format(complete_path, e))