Skip to content

Commit

Permalink
Function name fix, using uuid1mc, time format fix, TaskRegistry fix (#60
Browse files Browse the repository at this point in the history
)

* function name fix, using uuid1mc, time format fix

* deleting keys from registry when deleting a file

* restructured TaskRegistry to use Redis Keys with TTL set to 60 seconds
instead of assigning them to a set

* re-adding nose and coverage to travis file, removed it by mistake

* using runtests.py file to run tests in travis

* reordered functions and better string formating

* made the redis key expiry time an argument

* assigning error handler the right way

* not resizing if the gif image is animated

* added pillow as dependency

* using redis pipeline to delete multiple keys

* missed one place to use redis pipeline

* deleted runtests.py, not needed anymore

* fixed typo

* fixed some import issues

* moved upload directory creation to manage.py

* added some comments

* moved importing app inside the only function that uses it

* Update runtests.sh

* added manage.py init to runtests

* removed mkdir from travis config

* fixed upload path issue

* moar fixes to registry, cleanup, moved back to uuid4

* fixed typo

* setting test environment variable in travis file

* setting env in existing block, otherwise it gets overwritten

* moved the init command out of if block

* rearranged travis config, cleanup of upload directory path
  • Loading branch information
Bibhas authored Jun 23, 2017
1 parent cc369e6 commit 380bb87
Show file tree
Hide file tree
Showing 14 changed files with 174 additions and 79 deletions.
16 changes: 10 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,23 @@ env:
SQ9lTuSD5jg3NSM8yMfMU58ppAYBgd+6VQP01wUPFrV/JSsbfgnVjMTm/Nbk
EGeHYvQUiyAg7zM4KdNJr6txj+jBE8MAeh7EwYNHoh9B7Vx//GxmXFnWyjXV
9cJkFroDW1Zfs2SZjLtzMQC8YXE30jmMxg+XHCQewKzRa4u1320=
# this is already being set in runtests.sh,
# but need to set it for `./manage.py init` too.
# could set it before running manage.py, but chose to put it here
- FLASK_ENV=TESTING
language: python
python:
- 2.7
services:
- redis-server
before_script:
- mkdir imgee/static/test_uploads
script:
- nosetests --with-coverage
install:
- sudo apt-get -qq install zlib1g-dev libfreetype6-dev liblcms1-dev libwebp-dev libjpeg-dev libpng-dev libfreetype6-dev libtiff4-dev librsvg2-dev ghostscript imagemagick pandoc
- sudo apt-get -qq install zlib1g-dev libfreetype6-dev liblcms1-dev libwebp-dev libjpeg-dev libpng-dev libfreetype6-dev libtiff4-dev librsvg2-dev ghostscript imagemagick pandoc inkscape
- pip install -r requirements.txt
- pip install nose coverage BeautifulSoup4 Pillow
- pip install -r test_requirements.txt
before_script:
- ./manage.py init # creates the test upload directory
script:
- ./runtests.sh
notifications:
email: false
slack:
Expand Down
18 changes: 5 additions & 13 deletions imgee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,22 @@
from .models import db
from .tasks import TaskRegistry

registry = TaskRegistry(os.getenv('ENV', 'production'))
registry = TaskRegistry()


def mkdir_p(dirname):
if not os.path.exists(dirname):
os.makedirs(dirname)


# Configure the app
# Configure the application
coaster.app.init_app(app)
migrate = Migrate(app, db)
baseframe.init_app(app, requires=['baseframe', 'picturefill', 'imgee'])
lastuser.init_app(app)
lastuser.init_usermanager(UserManager(db, models.User))
registry.init_app(app)


@app.errorhandler(403)
def error403(error):
return redirect(url_for('login'))

if app.config.get('MEDIA_DOMAIN') and (
app.config['MEDIA_DOMAIN'].startswith('http:') or
app.config['MEDIA_DOMAIN'].startswith('https:')):
if app.config.get('MEDIA_DOMAIN', '').lower().startswith(('http://', 'https://')):
app.config['MEDIA_DOMAIN'] = app.config['MEDIA_DOMAIN'].split(':', 1)[1]
mkdir_p(app.config['UPLOADED_FILES_DEST'])

registry.set_connection()
app.upload_folder = os.path.join(app.static_folder, app.config.get('UPLOADED_FILES_DIR'))
8 changes: 6 additions & 2 deletions imgee/models/stored_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,16 @@ def __repr__(self):
def extn(self):
return guess_extension(self.mimetype, self.orig_extn) or ''

@property
def filename(self):
return self.name + self.extn

def dict_data(self):
return dict(
title=self.title,
uploaded=self.created_at.strftime('%B %d, %Y'),
uploaded=self.created_at.isoformat() + 'Z',
filesize=app.jinja_env.filters['filesizeformat'](self.size),
imgsize='%s x %s' % (self.width, self.height),
imgsize=u'%s×%s px' % (self.width, self.height),
url=url_for('view_image', profile=self.profile.name, image=self.name),
thumb_url=url_for('get_image', image=self.name, size=app.config.get('THUMBNAIL_SIZE'))
)
Expand Down
52 changes: 34 additions & 18 deletions imgee/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from datetime import datetime, timedelta
from glob import glob
import os.path
import os
import re
from subprocess import check_call, CalledProcessError
import time
Expand All @@ -14,9 +15,9 @@
from imgee import app
from imgee.models import db, Thumbnail, StoredFile
from imgee.utils import (
newid, guess_extension, get_file_type,
newid, guess_extension, get_file_type, is_animated_gif,
path_for, get_s3_folder, get_s3_bucket,
download_frm_s3, get_width_height, ALLOWED_MIMETYPES,
download_from_s3, get_width_height, ALLOWED_MIMETYPES,
exists_in_s3, THUMBNAIL_COMMANDS
)

Expand All @@ -29,17 +30,25 @@ def get_resized_image(img, size, is_thumbnail=False):
"""
registry = imgee.registry
img_name = img.name

if img.mimetype == 'image/gif':
# if the gif file is animated, not resizing it for now
# but we will need to resize the gif, keeping animation intact
# https://github.com/hasgeek/imgee/issues/55
src_path = download_from_s3(img.filename)
if is_animated_gif(src_path):
return img.name

size_t = parse_size(size)
if (size_t and size_t[0] != img.width and size_t[1] != img.height) or ('thumb_extn' in ALLOWED_MIMETYPES[img.mimetype] and ALLOWED_MIMETYPES[img.mimetype]['thumb_extn'] != img.extn):
w_or_h = or_(Thumbnail.width == size_t[0], Thumbnail.height == size_t[1])
scaled = Thumbnail.query.filter(w_or_h, Thumbnail.stored_file == img).first()
if scaled and exists_in_s3(scaled):
img_name = scaled.name
else:
size = get_fitting_size((img.width, img.height), size_t)
original_size = (img.width, img.height)
size = get_fitting_size(original_size, size_t)
resized_filename = get_resized_filename(img, size)
registry = imgee.registry

try:
if resized_filename in registry:
# file is still being processed
Expand All @@ -48,12 +57,10 @@ def get_resized_image(img, size, is_thumbnail=False):
else:
registry.add(resized_filename)
img_name = resize_and_save(img, size, is_thumbnail=is_thumbnail)
except Exception as e:
# something broke while processing the file
raise e
finally:
# file has been processed, remove from registry
registry.remove(resized_filename)
if resized_filename in registry:
registry.remove(resized_filename)
return img_name


Expand All @@ -74,7 +81,7 @@ def save_file(fp, profile, title=None):

stored_file = save_img_in_db(name=id_, title=title, local_path=local_path,
profile=profile, mimetype=content_type, orig_extn=extn)
s3resp = save_on_s3(img_name, content_type=content_type)
save_on_s3(img_name, content_type=content_type)
return title, stored_file


Expand Down Expand Up @@ -121,7 +128,8 @@ def save_on_s3(filename, remotename='', content_type='', bucket='', folder=''):
headers = {
'Cache-Control': 'max-age=31536000', # 60*60*24*365
'Content-Type': get_file_type(fp, filename),
'Expires': datetime.now() + timedelta(days=365)
# once cached, it is set to expire after a year
'Expires': datetime.utcnow() + timedelta(days=365)
}
k.set_contents_from_file(fp, policy='public-read', headers=headers)
return filename
Expand All @@ -146,7 +154,7 @@ def parse_size(size):
return tuple(map(int, size))


def get_fitting_size((orig_w, orig_h), size):
def get_fitting_size(original_size, size):
"""
Return the size to fit the image to the box
along the smaller side and preserve aspect ratio.
Expand All @@ -171,6 +179,8 @@ def get_fitting_size((orig_w, orig_h), size):
>>> get_fitting_size((200, 500), (400, 600))
[240, 600]
"""
orig_w, orig_h = original_size

if orig_w == 0 or orig_h == 0:
# this is either a cdr file or a zero width file
# just go with target size
Expand Down Expand Up @@ -216,7 +226,7 @@ def resize_and_save(img, size, is_thumbnail=False):
Get the original image from local disk cache, download it from S3 if it misses.
Resize the image and save resized image on S3 and size details in db.
"""
src_path = download_frm_s3(img.name + img.extn)
src_path = download_from_s3(img.filename)

if 'thumb_extn' in ALLOWED_MIMETYPES[img.mimetype]:
format = ALLOWED_MIMETYPES[img.mimetype]['thumb_extn']
Expand Down Expand Up @@ -258,9 +268,10 @@ def resize_img(src, dest, size, mimetype, format, is_thumbnail):

def clean_local_cache(expiry=24):
"""
Remove files from local cache which are NOT accessed in the last `expiry` hours.
Remove files from local cache
which are NOT accessed in the last `expiry` hours.
"""
cache_path = app.config.get('UPLOADED_FILES_DEST')
cache_path = app.upload_folder
cache_path = os.path.join(cache_path, '*')
min_atime = time.time() - expiry*60*60

Expand All @@ -275,11 +286,16 @@ def clean_local_cache(expiry=24):
def delete(stored_file, commit=True):
"""
Delete all the thumbnails and images associated with a file, from local cache and S3.
Wait for the upload/resize to complete if queued for the same image.
"""
registry = imgee.registry
# remove all the keys related to the given file name
# this is delete all keys matching `imgee:registry:<name>*`
registry.remove_keys_starting_with(stored_file.name)

# remove locally
cache_path = app.config.get('UPLOADED_FILES_DEST')
cached_img_path = os.path.join(cache_path, '%s*' % stored_file.name)
cache_path = app.upload_folder
os.remove(os.path.join(cache_path, '%s' % stored_file.filename))
cached_img_path = os.path.join(cache_path, '%s_*' % stored_file.name)
for f in glob(cached_img_path):
os.remove(f)

Expand Down
88 changes: 69 additions & 19 deletions imgee/tasks.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,85 @@
import re
import redis
from imgee import app


class InvalidRedisQueryException(Exception):
pass


class TaskRegistry(object):
def __init__(self, name='default', connection=None):
self.connection = redis.from_url(connection) if connection else None
def __init__(self, app=None, name='default', connection=None):
self.name = name
self.key = 'imgee:registry:%s' % name
self.connection = connection
self.key_prefix = 'imgee:registry:%s' % name
self.filename_pattern = r'^[a-z0-9_\.]+$'

def set_connection(self, connection=None):
connection = connection or app.config.get('REDIS_URL')
self.connection = redis.from_url(connection)
if app:
self.init_app(app)
else:
self.app = None

def add(self, taskid):
self.connection.sadd(self.key, taskid)
def init_app(self, app):
self.app = app

def remove(self, taskid):
self.connection.srem(self.key, taskid)
if not self.connection:
url = self.app.config.get('REDIS_URL')
self.connection = redis.from_url(url)
self.pipe = self.connection.pipeline()

def remove_all(self):
for k in self.get_all_keys():
self.remove(k)
def is_valid_query(self, query):
return bool(re.match(self.filename_pattern, query))

def key_for(self, taskid):
return u'{key_prefix}:{taskid}'.format(key_prefix=self.key_prefix, taskid=taskid)

def __contains__(self, taskid):
return self.connection.sismember(self.key, taskid)
return len(self.connection.keys(self.key_for(taskid))) > 0

def keys_starting_with(self, query):
return filter(lambda k: k.startswith(query), self.connection.smembers(self.key))
def add(self, taskid, expire=60):
self.connection.set(self.key_for(taskid), taskid)
# setting TTL of 60 seconds for the key
# if the file doesn't get processed within 60 seconds,
# it'll be removed from the registry
self.connection.expire(self.key_for(taskid), expire)

def search(self, query):
return filter(lambda k: str(query) in k, self.connection.smembers(self.key))
# >> KEYS imgee:registry:default:*query*
if not self.is_valid_query(query):
raise InvalidRedisQueryException(u'Invalid query for searching redis keys: {}'.format(query))
return self.connection.keys(self.key_for('*{}*'.format(query)))

def get_all_keys(self):
return list(self.connection.smembers(self.key))
# >> KEYS imgee:registry:default:*
return self.connection.keys(self.key_for('*'))

def keys_starting_with(self, query):
# >> KEYS imgee:registry:default:query_*
# chances are that we'll use this function to find the
# thumbnail keys, which look like "name_wNN_hNN", hence the _
if not self.is_valid_query(query):
raise InvalidRedisQueryException(u'Invalid query for searching redis keys, starting with: {}'.format(query))
return self.connection.keys(self.key_for('{}_*'.format(query)))

def remove(self, taskid):
# remove a key with the taskid
self.remove_by_key(self.key_for(taskid))

def remove_by_key(self, key):
# remove a redis key directly
self.connection.delete(key)

def remove_keys(self, keys):
for k in keys:
self.pipe.delete(k)
self.pipe.execute()

def remove_keys_matching(self, query):
self.remove_keys(self.search(query))

def remove_keys_starting_with(self, query):
# query will most likely be stored_file.name, So
# Delete keys for only `<query>` and `<query>_*`
self.remove_keys([self.key_for(query)] + self.keys_starting_with(query))

def remove_all(self):
self.remove_keys(self.get_all_keys())
Loading

0 comments on commit 380bb87

Please sign in to comment.