-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflickr-set-downloader.py
222 lines (175 loc) · 7.69 KB
/
flickr-set-downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import math
import os
import pickle
import urllib
import requests
import configparser
import argparse
import logging
import flickrapi
import filesystem
FORMAT = '%(asctime)-15s - %(message)s'
logging.basicConfig(format=FORMAT)
logger = logging.getLogger('flickr-set-downloader')
def first_line(string):
return string.split(os.linesep)[0]
# The flickr API can be a bit flaky at times. We use this decorator to retry
# API calls that fail a few times. Usually it's enough to try once more for each call.
def retry(ExceptionsToCheck, tries=4):
def real_decorator(func):
def f_retry(*args, **kwargs):
mtries = tries
while mtries > 1:
try:
return func(*args, **kwargs)
except ExceptionsToCheck as e:
mtries -= 1
msg = "{} failed with '{}', retrying... ({} attempts left)" \
.format(func.__name__, first_line(str(e)), mtries)
print(msg)
return func(*args, **kwargs)
return f_retry
return real_decorator
# Network exceptions could be worth retrying
NETWORK_EXCEPTIONS = (requests.exceptions.BaseHTTPError,
requests.exceptions.ConnectionError,
flickrapi.exceptions.FlickrError,
urllib.error.HTTPError)
class AlbumDownloadSpec:
def __init__(self, name, identifier):
self.name = name
self.identifier = identifier
self.photos = []
class PhotoDownloadSpec:
def __init__(self, flickr, name, identifier, filetype):
self._flickr = flickr
self.name = name
self.identifier = identifier
self.filetype = filetype
@retry(NETWORK_EXCEPTIONS)
def get_url(self):
sizes = self._flickr.photos.getSizes(photo_id = self.identifier)
return sizes.findall('.//size[@label="Original"]')[0].get('source')
def get_file_id(photoset_id, photo_id):
return '{}-{}'.format(photoset_id, photo_id)
def get_photo_filename(photo_name, filetype, idx, num_photos, photoset_title):
width = math.floor(math.log10(num_photos)) + 2
filename = '{idx:0{width}d} - {photo_name}.{suffix}' \
.format(idx = idx, photo_name = photo_name, suffix = filetype, width = width)
return os.path.join(photoset_title, filename)
def get_download_spec(config):
flickr = flickrapi.FlickrAPI(config['api_key'], config['api_secret'],
username = config['username'])
download_spec = []
for photoset in flickr.walk_photosets():
download_spec.append(get_album_spec(flickr, photoset))
return download_spec
@retry(NETWORK_EXCEPTIONS)
def get_album_spec(flickr, photoset):
photoset_id = photoset.get('id')
photoset_title = photoset.find('title').text.strip()
primary_photo = photoset.get('primary')
album_spec = AlbumDownloadSpec(photoset_title, photoset_id)
print("Scanning photoset: {}".format(photoset_title))
logger.debug("album identifier is {}".format(photoset_id))
for photo in flickr.walk_set(photoset_id):
album_spec.photos.append(get_photo_spec(flickr, photo))
return album_spec
@retry(NETWORK_EXCEPTIONS)
def get_photo_spec(flickr, photo):
photo_id = photo.get('id')
photo_name = photo.get('title')
logger.debug("Found photo: {} - {}".format(photo_id, photo_name))
photo_info = flickr.photos.getInfo(photo_id = photo_id).getchildren()[0]
filetype = photo_info.get('originalformat')
return PhotoDownloadSpec(flickr, photo_name, photo_id, filetype)
def download(working_directory, config):
download_spec = get_download_spec(config)
fs = filesystem.Filesystem(working_directory)
try:
for album in download_spec:
dirname = os.path.join(working_directory, album.name)
if not os.path.exists(dirname):
print ("Making directory {}".format(dirname))
os.mkdir(dirname)
num_photos = len(album.photos)
for idx, photo in enumerate(album.photos, 1):
filename = get_photo_filename(photo.name, photo.filetype, idx, num_photos, album.name)
file_identifier = get_file_id(album.identifier, photo.identifier)
@retry(NETWORK_EXCEPTIONS)
def download(url, path):
urllib.request.urlretrieve(url, path)
def creator(path, try_num=0):
print(" -- Downloading {}".format(path))
download(photo.get_url(), path)
fs.add(file_identifier, filename, creator)
fs.finish_sync()
except KeyboardInterrupt:
pass
finally:
print('Saving filesystem state')
fs.save()
# Based on https://stackoverflow.com/a/11415816/265249
class writable_dir(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
prospective_dir = values
if not os.path.isdir(prospective_dir):
raise argparse.ArgumentTypeError("{} is not a valid directory".format(prospective_dir))
if os.access(prospective_dir, os.W_OK):
setattr(namespace, self.dest, prospective_dir)
else:
raise argparse.ArgumentTypeError("{} is not a readable directory".format(prospective_dir))
def parse_arguments():
helptext = \
'''Flickr Photoset Backup and Downloader
=====================================
The script will maintain a folder with all you flickr photosets as subfolders,
which can be useful if you wish to backup the data you add to flickr.
Files will be named so that they sort the same way as you have organized your
files in the photoset.
If you run the script multiple times in the same folder only new files will
be downloaded.
Requirements
------------
This script requires the flickr API Python library. Install it by running
pip install flickrapi
Configuration
-------------
The script requires a config file named flickr-downloader.config in the
assigned working directory. Example contents:
[flickr]
usename: your-username-here
api_key: your-api-key-here
api_secret: your-api-secret-here
You can generate an API key here: https://www.flickr.com/services/apps/create/apply/
'''
parser = argparse.ArgumentParser(description=helptext, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('working_directory', type=str, action=writable_dir,
help='Path to folder where photos should be stored')
parser.add_argument('--debug', action='store_true',
help='Enable debugging output')
return parser.parse_args()
def parse_configuration(working_directory):
config = configparser.ConfigParser()
config.read(os.path.join(working_directory, 'flickr-downloader.config'))
try:
username = config.get('flickr', 'username')
api_key = config.get('flickr', 'api_key')
api_secret = config.get('flickr', 'api_secret')
except configparser.NoOptionError as e:
print('Error while reading config file: {}'.format(e.message))
print('')
print('For help, please see the readme file or execute script with --help argument')
return {'username': username, 'api_key': api_key, 'api_secret': api_secret}
def main():
args = parse_arguments()
if args.debug:
logger.setLevel(logging.DEBUG)
logger.debug("Logger level set to debug")
config = parse_configuration(args.working_directory)
download(args.working_directory, config)
if __name__ == '__main__':
main()