From 380bb8717e064458730b2004d0d7563d79faacfd Mon Sep 17 00:00:00 2001 From: Bibhas Date: Fri, 23 Jun 2017 10:59:22 +0530 Subject: [PATCH] Function name fix, using uuid1mc, time format fix, TaskRegistry fix (#60) * function name fix, using uuid1mc, time format fix * deleting keys from registry when deleting a file * restructured TaskRegistry to use Redis Keys with TTL set to 60 seconds instead of assigning them to a set * re-adding nose and coverage to travis file, removed it by mistake * using runtests.py file to run tests in travis * reordered functions and better string formating * made the redis key expiry time an argument * assigning error handler the right way * not resizing if the gif image is animated * added pillow as dependency * using redis pipeline to delete multiple keys * missed one place to use redis pipeline * deleted runtests.py, not needed anymore * fixed typo * fixed some import issues * moved upload directory creation to manage.py * added some comments * moved importing app inside the only function that uses it * Update runtests.sh * added manage.py init to runtests * removed mkdir from travis config * fixed upload path issue * moar fixes to registry, cleanup, moved back to uuid4 * fixed typo * setting test environment variable in travis file * setting env in existing block, otherwise it gets overwritten * moved the init command out of if block * rearranged travis config, cleanup of upload directory path --- .travis.yml | 16 ++++--- imgee/__init__.py | 18 +++----- imgee/models/stored_file.py | 8 +++- imgee/storage.py | 52 ++++++++++++++-------- imgee/tasks.py | 88 +++++++++++++++++++++++++++++-------- imgee/utils.py | 23 +++++++--- instance/settings.py | 2 +- instance/testing.py | 5 ++- manage.py | 15 ++++++- requirements.txt | 9 ++-- runtests.py | 7 --- runtests.sh | 6 +++ test_requirements.txt | 2 + tests/fixtures.py | 2 + 14 files changed, 174 insertions(+), 79 deletions(-) delete mode 100755 runtests.py create mode 100755 runtests.sh create mode 100644 test_requirements.txt diff --git a/.travis.yml b/.travis.yml index d724fdf4..1b11eb41 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,19 +8,23 @@ env: SQ9lTuSD5jg3NSM8yMfMU58ppAYBgd+6VQP01wUPFrV/JSsbfgnVjMTm/Nbk EGeHYvQUiyAg7zM4KdNJr6txj+jBE8MAeh7EwYNHoh9B7Vx//GxmXFnWyjXV 9cJkFroDW1Zfs2SZjLtzMQC8YXE30jmMxg+XHCQewKzRa4u1320= + # this is already being set in runtests.sh, + # but need to set it for `./manage.py init` too. + # could set it before running manage.py, but chose to put it here + - FLASK_ENV=TESTING language: python python: - 2.7 services: - redis-server -before_script: - - mkdir imgee/static/test_uploads -script: - - nosetests --with-coverage install: - - sudo apt-get -qq install zlib1g-dev libfreetype6-dev liblcms1-dev libwebp-dev libjpeg-dev libpng-dev libfreetype6-dev libtiff4-dev librsvg2-dev ghostscript imagemagick pandoc + - sudo apt-get -qq install zlib1g-dev libfreetype6-dev liblcms1-dev libwebp-dev libjpeg-dev libpng-dev libfreetype6-dev libtiff4-dev librsvg2-dev ghostscript imagemagick pandoc inkscape - pip install -r requirements.txt - - pip install nose coverage BeautifulSoup4 Pillow + - pip install -r test_requirements.txt +before_script: + - ./manage.py init # creates the test upload directory +script: + - ./runtests.sh notifications: email: false slack: diff --git a/imgee/__init__.py b/imgee/__init__.py index 14f6b245..9b768989 100644 --- a/imgee/__init__.py +++ b/imgee/__init__.py @@ -22,30 +22,22 @@ from .models import db from .tasks import TaskRegistry -registry = TaskRegistry(os.getenv('ENV', 'production')) +registry = TaskRegistry() - -def mkdir_p(dirname): - if not os.path.exists(dirname): - os.makedirs(dirname) - - -# Configure the app +# Configure the application coaster.app.init_app(app) migrate = Migrate(app, db) baseframe.init_app(app, requires=['baseframe', 'picturefill', 'imgee']) lastuser.init_app(app) lastuser.init_usermanager(UserManager(db, models.User)) +registry.init_app(app) @app.errorhandler(403) def error403(error): return redirect(url_for('login')) -if app.config.get('MEDIA_DOMAIN') and ( - app.config['MEDIA_DOMAIN'].startswith('http:') or - app.config['MEDIA_DOMAIN'].startswith('https:')): +if app.config.get('MEDIA_DOMAIN', '').lower().startswith(('http://', 'https://')): app.config['MEDIA_DOMAIN'] = app.config['MEDIA_DOMAIN'].split(':', 1)[1] -mkdir_p(app.config['UPLOADED_FILES_DEST']) -registry.set_connection() +app.upload_folder = os.path.join(app.static_folder, app.config.get('UPLOADED_FILES_DIR')) diff --git a/imgee/models/stored_file.py b/imgee/models/stored_file.py index 892bf71f..f58cd6ff 100644 --- a/imgee/models/stored_file.py +++ b/imgee/models/stored_file.py @@ -58,12 +58,16 @@ def __repr__(self): def extn(self): return guess_extension(self.mimetype, self.orig_extn) or '' + @property + def filename(self): + return self.name + self.extn + def dict_data(self): return dict( title=self.title, - uploaded=self.created_at.strftime('%B %d, %Y'), + uploaded=self.created_at.isoformat() + 'Z', filesize=app.jinja_env.filters['filesizeformat'](self.size), - imgsize='%s x %s' % (self.width, self.height), + imgsize=u'%s×%s px' % (self.width, self.height), url=url_for('view_image', profile=self.profile.name, image=self.name), thumb_url=url_for('get_image', image=self.name, size=app.config.get('THUMBNAIL_SIZE')) ) diff --git a/imgee/storage.py b/imgee/storage.py index 3ed8244b..fd406389 100644 --- a/imgee/storage.py +++ b/imgee/storage.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta from glob import glob import os.path +import os import re from subprocess import check_call, CalledProcessError import time @@ -14,9 +15,9 @@ from imgee import app from imgee.models import db, Thumbnail, StoredFile from imgee.utils import ( - newid, guess_extension, get_file_type, + newid, guess_extension, get_file_type, is_animated_gif, path_for, get_s3_folder, get_s3_bucket, - download_frm_s3, get_width_height, ALLOWED_MIMETYPES, + download_from_s3, get_width_height, ALLOWED_MIMETYPES, exists_in_s3, THUMBNAIL_COMMANDS ) @@ -29,6 +30,15 @@ def get_resized_image(img, size, is_thumbnail=False): """ registry = imgee.registry img_name = img.name + + if img.mimetype == 'image/gif': + # if the gif file is animated, not resizing it for now + # but we will need to resize the gif, keeping animation intact + # https://github.com/hasgeek/imgee/issues/55 + src_path = download_from_s3(img.filename) + if is_animated_gif(src_path): + return img.name + size_t = parse_size(size) if (size_t and size_t[0] != img.width and size_t[1] != img.height) or ('thumb_extn' in ALLOWED_MIMETYPES[img.mimetype] and ALLOWED_MIMETYPES[img.mimetype]['thumb_extn'] != img.extn): w_or_h = or_(Thumbnail.width == size_t[0], Thumbnail.height == size_t[1]) @@ -36,10 +46,9 @@ def get_resized_image(img, size, is_thumbnail=False): if scaled and exists_in_s3(scaled): img_name = scaled.name else: - size = get_fitting_size((img.width, img.height), size_t) + original_size = (img.width, img.height) + size = get_fitting_size(original_size, size_t) resized_filename = get_resized_filename(img, size) - registry = imgee.registry - try: if resized_filename in registry: # file is still being processed @@ -48,12 +57,10 @@ def get_resized_image(img, size, is_thumbnail=False): else: registry.add(resized_filename) img_name = resize_and_save(img, size, is_thumbnail=is_thumbnail) - except Exception as e: - # something broke while processing the file - raise e finally: # file has been processed, remove from registry - registry.remove(resized_filename) + if resized_filename in registry: + registry.remove(resized_filename) return img_name @@ -74,7 +81,7 @@ def save_file(fp, profile, title=None): stored_file = save_img_in_db(name=id_, title=title, local_path=local_path, profile=profile, mimetype=content_type, orig_extn=extn) - s3resp = save_on_s3(img_name, content_type=content_type) + save_on_s3(img_name, content_type=content_type) return title, stored_file @@ -121,7 +128,8 @@ def save_on_s3(filename, remotename='', content_type='', bucket='', folder=''): headers = { 'Cache-Control': 'max-age=31536000', # 60*60*24*365 'Content-Type': get_file_type(fp, filename), - 'Expires': datetime.now() + timedelta(days=365) + # once cached, it is set to expire after a year + 'Expires': datetime.utcnow() + timedelta(days=365) } k.set_contents_from_file(fp, policy='public-read', headers=headers) return filename @@ -146,7 +154,7 @@ def parse_size(size): return tuple(map(int, size)) -def get_fitting_size((orig_w, orig_h), size): +def get_fitting_size(original_size, size): """ Return the size to fit the image to the box along the smaller side and preserve aspect ratio. @@ -171,6 +179,8 @@ def get_fitting_size((orig_w, orig_h), size): >>> get_fitting_size((200, 500), (400, 600)) [240, 600] """ + orig_w, orig_h = original_size + if orig_w == 0 or orig_h == 0: # this is either a cdr file or a zero width file # just go with target size @@ -216,7 +226,7 @@ def resize_and_save(img, size, is_thumbnail=False): Get the original image from local disk cache, download it from S3 if it misses. Resize the image and save resized image on S3 and size details in db. """ - src_path = download_frm_s3(img.name + img.extn) + src_path = download_from_s3(img.filename) if 'thumb_extn' in ALLOWED_MIMETYPES[img.mimetype]: format = ALLOWED_MIMETYPES[img.mimetype]['thumb_extn'] @@ -258,9 +268,10 @@ def resize_img(src, dest, size, mimetype, format, is_thumbnail): def clean_local_cache(expiry=24): """ - Remove files from local cache which are NOT accessed in the last `expiry` hours. + Remove files from local cache + which are NOT accessed in the last `expiry` hours. """ - cache_path = app.config.get('UPLOADED_FILES_DEST') + cache_path = app.upload_folder cache_path = os.path.join(cache_path, '*') min_atime = time.time() - expiry*60*60 @@ -275,11 +286,16 @@ def clean_local_cache(expiry=24): def delete(stored_file, commit=True): """ Delete all the thumbnails and images associated with a file, from local cache and S3. - Wait for the upload/resize to complete if queued for the same image. """ + registry = imgee.registry + # remove all the keys related to the given file name + # this is delete all keys matching `imgee:registry:*` + registry.remove_keys_starting_with(stored_file.name) + # remove locally - cache_path = app.config.get('UPLOADED_FILES_DEST') - cached_img_path = os.path.join(cache_path, '%s*' % stored_file.name) + cache_path = app.upload_folder + os.remove(os.path.join(cache_path, '%s' % stored_file.filename)) + cached_img_path = os.path.join(cache_path, '%s_*' % stored_file.name) for f in glob(cached_img_path): os.remove(f) diff --git a/imgee/tasks.py b/imgee/tasks.py index 1c257339..46f148db 100644 --- a/imgee/tasks.py +++ b/imgee/tasks.py @@ -1,35 +1,85 @@ +import re import redis -from imgee import app + + +class InvalidRedisQueryException(Exception): + pass class TaskRegistry(object): - def __init__(self, name='default', connection=None): - self.connection = redis.from_url(connection) if connection else None + def __init__(self, app=None, name='default', connection=None): self.name = name - self.key = 'imgee:registry:%s' % name + self.connection = connection + self.key_prefix = 'imgee:registry:%s' % name + self.filename_pattern = r'^[a-z0-9_\.]+$' - def set_connection(self, connection=None): - connection = connection or app.config.get('REDIS_URL') - self.connection = redis.from_url(connection) + if app: + self.init_app(app) + else: + self.app = None - def add(self, taskid): - self.connection.sadd(self.key, taskid) + def init_app(self, app): + self.app = app - def remove(self, taskid): - self.connection.srem(self.key, taskid) + if not self.connection: + url = self.app.config.get('REDIS_URL') + self.connection = redis.from_url(url) + self.pipe = self.connection.pipeline() - def remove_all(self): - for k in self.get_all_keys(): - self.remove(k) + def is_valid_query(self, query): + return bool(re.match(self.filename_pattern, query)) + + def key_for(self, taskid): + return u'{key_prefix}:{taskid}'.format(key_prefix=self.key_prefix, taskid=taskid) def __contains__(self, taskid): - return self.connection.sismember(self.key, taskid) + return len(self.connection.keys(self.key_for(taskid))) > 0 - def keys_starting_with(self, query): - return filter(lambda k: k.startswith(query), self.connection.smembers(self.key)) + def add(self, taskid, expire=60): + self.connection.set(self.key_for(taskid), taskid) + # setting TTL of 60 seconds for the key + # if the file doesn't get processed within 60 seconds, + # it'll be removed from the registry + self.connection.expire(self.key_for(taskid), expire) def search(self, query): - return filter(lambda k: str(query) in k, self.connection.smembers(self.key)) + # >> KEYS imgee:registry:default:*query* + if not self.is_valid_query(query): + raise InvalidRedisQueryException(u'Invalid query for searching redis keys: {}'.format(query)) + return self.connection.keys(self.key_for('*{}*'.format(query))) def get_all_keys(self): - return list(self.connection.smembers(self.key)) + # >> KEYS imgee:registry:default:* + return self.connection.keys(self.key_for('*')) + + def keys_starting_with(self, query): + # >> KEYS imgee:registry:default:query_* + # chances are that we'll use this function to find the + # thumbnail keys, which look like "name_wNN_hNN", hence the _ + if not self.is_valid_query(query): + raise InvalidRedisQueryException(u'Invalid query for searching redis keys, starting with: {}'.format(query)) + return self.connection.keys(self.key_for('{}_*'.format(query))) + + def remove(self, taskid): + # remove a key with the taskid + self.remove_by_key(self.key_for(taskid)) + + def remove_by_key(self, key): + # remove a redis key directly + self.connection.delete(key) + + def remove_keys(self, keys): + for k in keys: + self.pipe.delete(k) + self.pipe.execute() + + def remove_keys_matching(self, query): + self.remove_keys(self.search(query)) + + def remove_keys_starting_with(self, query): + # query will most likely be stored_file.name, So + # Delete keys for only `` and `_*` + self.remove_keys([self.key_for(query)] + self.keys_starting_with(query)) + + def remove_all(self): + self.remove_keys(self.get_all_keys()) diff --git a/imgee/utils.py b/imgee/utils.py index ad8593c2..2c7246a1 100644 --- a/imgee/utils.py +++ b/imgee/utils.py @@ -1,10 +1,9 @@ # -*- coding: utf-8 -*- import re -import os.path +import os from subprocess import check_output, CalledProcessError from urlparse import urljoin -from uuid import uuid4 from boto import connect_s3 from boto.s3.bucket import Bucket @@ -12,8 +11,9 @@ import defusedxml.cElementTree as elementtree from flask import request import magic +from PIL import Image -import imgee +from uuid import uuid4 from imgee import app THUMBNAIL_COMMANDS = { @@ -97,7 +97,7 @@ def get_file_url(scheme=None): def path_for(img_name): - return os.path.join(app.config['UPLOADED_FILES_DEST'], img_name) + return os.path.join(app.upload_folder, img_name) # -- mimetypes and content types @@ -131,6 +131,16 @@ def is_svg(fp): return tag == '{http://www.w3.org/2000/svg}svg' +def is_animated_gif(local_path): + is_animated = True + gif = Image.open(local_path) + try: + gif.seek(1) + except EOFError: + is_animated = False + return is_animated + + def get_file_type(fp, filename=None): fp.seek(0) data = fp.read() @@ -186,7 +196,7 @@ def exists_in_s3(thumb): return True -def download_frm_s3(img_name): +def download_from_s3(img_name): local_path = path_for(img_name) if not os.path.exists(local_path): bucket = get_s3_bucket() @@ -238,11 +248,12 @@ def get_no_previews_url(size): def get_image_url(image, size=None): + from imgee import storage extn = image.extn if size and extn in EXTNS: if image.no_previews: return get_no_previews_url(size) - img_name = imgee.storage.get_resized_image(image, size) + img_name = storage.get_resized_image(image, size) if not img_name: return get_no_previews_url(size) else: diff --git a/instance/settings.py b/instance/settings.py index 161fc352..e20edea9 100644 --- a/instance/settings.py +++ b/instance/settings.py @@ -33,7 +33,7 @@ #: Log file LOGFILE = 'error.log' #: File uploads -UPLOADED_FILES_DEST = 'imgee/static/uploads' +UPLOADED_FILES_DIR = 'uploads' # this will sit inside `app.static_folder` #: S3 Configuration AWS_ACCESS_KEY = 'Set aws access key here' AWS_SECRET_KEY = 'Set aws secret key here' diff --git a/instance/testing.py b/instance/testing.py index 54a6cf84..3a2430be 100644 --- a/instance/testing.py +++ b/instance/testing.py @@ -1,5 +1,8 @@ from os import environ -UPLOADED_FILES_DEST = 'imgee/static/test_uploads' + +# this will sit inside `app.static_folder` +UPLOADED_FILES_DIR = 'test_uploads' + AWS_FOLDER = 'test/' UNKNOWN_FILE_THUMBNAIL = 'unknown.jpeg' diff --git a/manage.py b/manage.py index 83393267..7c3aea72 100755 --- a/manage.py +++ b/manage.py @@ -1,12 +1,23 @@ #!/usr/bin/env python +import os -from coaster.manage import init_manager +from coaster.manage import manager, init_manager from imgee.models import db from imgee import app +def mkdir_p(dirname): + if not os.path.exists(dirname): + os.makedirs(dirname) + + +@manager.command +def init(): + mkdir_p(app.upload_folder) + if __name__ == "__main__": db.init_app(app) - manager = init_manager(app, db) + init_manager(app, db) + manager.run() diff --git a/requirements.txt b/requirements.txt index 8be7135d..6a81bea2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -Flask==0.10.1 +Flask https://github.com/hasgeek/coaster/zipball/master https://github.com/hasgeek/baseframe/zipball/master -SQLAlchemy==1.1.5 -SQLAlchemy-Utils==0.32.12 -Flask-SQLAlchemy==2.1 +SQLAlchemy +SQLAlchemy-Utils +Flask-SQLAlchemy psycopg2 https://github.com/hasgeek/flask-lastuser/zipball/master https://github.com/jace/flask-alembic/archive/master.zip @@ -13,3 +13,4 @@ xml4h python-magic defusedxml Flask-Migrate +pillow diff --git a/runtests.py b/runtests.py deleted file mode 100755 index b081e4c7..00000000 --- a/runtests.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-15 -*- -import os -import nose - -os.environ['FLASK_ENV'] = 'testing' -nose.main() diff --git a/runtests.sh b/runtests.sh new file mode 100755 index 00000000..37ced6f7 --- /dev/null +++ b/runtests.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -e +export FLASK_ENV="TESTING" +export PYTHONPATH=$(dirname $0) # setting project root as PYTHONPATH +coverage run `which nosetests` "$@" +coverage report -m diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 00000000..a6c88f59 --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,2 @@ +nose +coverage diff --git a/tests/fixtures.py b/tests/fixtures.py index e5aad2b6..b69eb2e2 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,3 +1,4 @@ +import os import unittest import random import string @@ -25,6 +26,7 @@ def get_test_profile(self): def setUp(self): app.config['TESTING'] = True app.testing = True + db.create_all() self.test_user_name = u'testuser' test_user = self.get_test_user(name=self.test_user_name)