[4f54f1]: / fetch_data / data_client.py

Download this file

63 lines (45 with data), 2.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
from google.cloud import storage
import googleapiclient.discovery
from oauth2client.client import GoogleCredentials
credentials = GoogleCredentials.get_application_default()
def create_service():
# Construct the service object for interacting with the Cloud Storage API -
# the 'storage' service, at version 'v1'.
# You can browse other available api services and versions here:
# http://g.co/dv/api-client-library/python/apis/
return googleapiclient.discovery.build('storage', 'v1', credentials=credentials)
def list_bucket(bucket):
"""Returns a list of metadata of the objects within the given bucket."""
service = create_service()
# Create a request to objects.list to retrieve a list of objects.
fields_to_return = \
'nextPageToken,items(name,size,contentType,metadata(my-key))'
req = service.objects().list(bucket=bucket, fields=fields_to_return)
all_objects = []
# If too many items to list in one request, list_next() will
# automatically handle paging with the pageToken.
while req:
resp = req.execute()
all_objects.extend(resp.get('items', []))
req = service.objects().list_next(req, resp)
return all_objects
def collect_images(bucket_name, project_name, working_dir='./'):
all_blobs = map(lambda item: item['name'], list_bucket(bucket_name))
client = storage.Client(project=project_name)
bucket = client.get_bucket(bucket_name)
for blob_item in all_blobs:
blob = storage.Blob(blob_item, bucket)
complete_path = os.path.join(working_dir, blob_item)
dir_name = os.path.dirname(complete_path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
if os.path.exists(complete_path):
print("Skipping {} since it already exists.".format(blob_item))
continue
with open(complete_path, 'wb') as file_obj:
try:
blob.download_to_file(file_obj)
print("Stored blob path: ", complete_path)
except Exception as e:
print("Downloading {} failed with {}.".format(complete_path, e))