From 441629fa754b2aab22fcdb855002d63de7b1a6b9 Mon Sep 17 00:00:00 2001 From: Bibhas Date: Fri, 7 Apr 2017 10:17:10 +0530 Subject: [PATCH 01/36] removed celery as dependency --- .gitignore | 3 + alembic/versions/2d5db2a698f6_init.py | 3 +- .../347ba3ac054f_update_image_attrs.py | 11 +- imgee/__init__.py | 11 +- imgee/api.py | 60 ---------- imgee/async.py | 64 ++-------- imgee/forms.py | 25 ++-- imgee/models/stored_file.py | 2 - imgee/storage.py | 110 +++++++----------- imgee/templates/profile.html | 36 +++--- imgee/utils.py | 63 +++++++--- imgee/views/index.py | 41 ++----- instance/settings.py | 6 +- instance/testing.py | 2 - requirements.txt | 15 ++- runcelery.py | 14 --- runceleryprod.sh | 2 - website.py | 4 +- 18 files changed, 170 insertions(+), 302 deletions(-) delete mode 100644 imgee/api.py delete mode 100644 runcelery.py delete mode 100755 runceleryprod.sh diff --git a/.gitignore b/.gitignore index b3309b74..500b6a01 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,6 @@ baseframe-packed.css baseframe-packed.js error.log imgee/static/uploads +*.bak +instance/production.env.sh +imgee/static/gen diff --git a/alembic/versions/2d5db2a698f6_init.py b/alembic/versions/2d5db2a698f6_init.py index e62c68f4..98bef630 100644 --- a/alembic/versions/2d5db2a698f6_init.py +++ b/alembic/versions/2d5db2a698f6_init.py @@ -39,8 +39,6 @@ def upgrade(): sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.Column('userid', sa.Unicode(length=22), nullable=False), - sa.Column('description', sa.UnicodeText(), nullable=False), - sa.Column('type', sa.Integer(), nullable=False), sa.Column('name', sa.Unicode(length=250), nullable=False), sa.Column('title', sa.Unicode(length=250), nullable=False), sa.PrimaryKeyConstraint('id'), @@ -63,6 +61,7 @@ def upgrade(): sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.Column('name', sa.String(length=50), nullable=False), + sa.Column('title', sa.String(length=50), nullable=False), sa.Column('profile_id', sa.Integer(), nullable=False), sa.ForeignKeyConstraint(['profile_id'], ['profile.id'], ), sa.PrimaryKeyConstraint('id') diff --git a/alembic/versions/347ba3ac054f_update_image_attrs.py b/alembic/versions/347ba3ac054f_update_image_attrs.py index dc8efc12..aba71953 100644 --- a/alembic/versions/347ba3ac054f_update_image_attrs.py +++ b/alembic/versions/347ba3ac054f_update_image_attrs.py @@ -14,6 +14,10 @@ import os.path import sys from glob import glob +from alembic import op +from sqlalchemy.sql import select +from sqlalchemy.orm.session import sessionmaker +from sqlalchemy.orm import load_only sys.path.append('../../') from imgee import db @@ -22,7 +26,10 @@ def upgrade(): - imgs = StoredFile.query.filter_by(size=None) + connection = op.get_bind() + Session = sessionmaker(bind=connection.engine) + session = Session(bind=connection) + imgs = session.query(StoredFile).filter_by(size=None).options(load_only("id", "name", "title")) for img in imgs: path = path_for(img.name) + '.*' @@ -34,7 +41,7 @@ def upgrade(): print 'updated attributes of %s\n' % img.title, else: print 'local file not found for %s\n' % img.title, - db.session.commit() + session.commit() def downgrade(): diff --git a/imgee/__init__.py b/imgee/__init__.py index 701283ba..5ee9da53 100644 --- a/imgee/__init__.py +++ b/imgee/__init__.py @@ -3,7 +3,6 @@ # The imports in this file are order-sensitive import os -from celery import Celery from flask import Flask, redirect, url_for from flask.ext.lastuser import Lastuser @@ -12,20 +11,18 @@ import coaster.app from ._version import __version__ - version = Version(__version__) app = Flask(__name__, instance_relative_config=True) lastuser = Lastuser() -celery = Celery() assets['imgee.css'][version] = 'css/app.css' from . import models, views from .models import db -from .api import api from .async import TaskRegistry -registry = TaskRegistry() +registry = TaskRegistry(os.getenv('ENV', 'production')) + def mkdir_p(dirname): if not os.path.exists(dirname): @@ -40,7 +37,7 @@ def error403(error): def init_for(env): coaster.app.init_app(app, env) baseframe.init_app(app, requires=['baseframe', 'picturefill', 'imgee']) - app.error_handlers[403] = error403 + app.error_handlers[403] = error403 lastuser.init_app(app) lastuser.init_usermanager(UserManager(db, models.User)) if app.config.get('MEDIA_DOMAIN') and ( @@ -48,6 +45,4 @@ def init_for(env): app.config['MEDIA_DOMAIN'].startswith('https:')): app.config['MEDIA_DOMAIN'] = app.config['MEDIA_DOMAIN'].split(':', 1)[1] mkdir_p(app.config['UPLOADED_FILES_DEST']) - celery.conf.add_defaults(app.config) registry.set_connection() - app.register_blueprint(api, url_prefix='/api/1') diff --git a/imgee/api.py b/imgee/api.py deleted file mode 100644 index 29187605..00000000 --- a/imgee/api.py +++ /dev/null @@ -1,60 +0,0 @@ -from flask import jsonify, request, Blueprint, url_for -from coaster.views import load_model, load_models -import os -from urlparse import urljoin - -from imgee import app, lastuser -from imgee.models import db, StoredFile, Profile -import async, utils, storage - -api = Blueprint('api', __name__) - -class Status(object): - ok = 'OK' - in_process = 'PROCESSING' - notfound = 'NOT FOUND' - -@api.errorhandler(404) -def error404(error): - return jsonify({"status": Status.notfound, "status_code": 404}) - - -@api.route('/file/.json') -@load_model(StoredFile, {'name': 'image'}, 'image') -def get_image_json(image): - size = request.args.get('size') - try: - url = utils.get_image_url(image, size) - except async.StillProcessingException as e: - imgname = e.args[0] - url = utils.get_url(imgname) - status = Status.in_process - else: - status = Status.ok - imgee_url = urljoin(request.host_url, url_for('get_image', image=image.name, size=size)) - - d = dict(url=url, status=status, imgee_url=imgee_url, status_code=200) - return jsonify(d) - - -@api.route('//new.json', methods=['POST']) -@load_model(Profile, {'name': 'profile'}, 'profile') -@lastuser.resource_handler('imgee/new') -def upload_file_json(callerinfo, profile): - file_ = request.files['file'] - title = request.form.get('title') - title, job = storage.save(file_, profile=profile, title=title) - try: - imgname = async.get_async_result(job) - except async.StillProcessingException as e: - imgname = e.args[0] - status = Status.in_process - else: - status = Status.ok - - url = utils.get_url(imgname) - imgname = os.path.splitext(imgname)[0] - imgee_url = urljoin(request.host_url, url_for('get_image', image=imgname)) - d = dict(url=url, status=status, imgee_url=imgee_url, status_code=200) - return jsonify(d) - diff --git a/imgee/async.py b/imgee/async.py index 0684b97c..79efc68a 100644 --- a/imgee/async.py +++ b/imgee/async.py @@ -1,33 +1,20 @@ import redis -from celery import Task -import celery.states -from celery.result import AsyncResult, EagerResult from flask import url_for, redirect, current_app, make_response import time import imgee -from imgee import app +from imgee import app, storage, utils from imgee.models import db -import storage, utils + def now_in_secs(): return int(time.time()) + def get_taskid(funcname, imgname): return "{f}:{n}".format(f=funcname, n=imgname) -class BaseTask(celery.Task): - abstract = True - def after_return(self, status, retval, task_id, args, kwargs, einfo): - # even if the task fails remove task_id so that on next request the task is executed. - imgee.registry.remove(task_id) - - def on_failure(self, exc, task_id, args, kwargs, einfo): - super(BaseTask, self).on_failure(exc, task_id, args, kwargs, einfo) - db.session.rollback() - - class TaskRegistry(object): def __init__(self, name='default', connection=None): self.connection = redis.from_url(connection) if connection else None @@ -44,40 +31,24 @@ def add(self, taskid): def remove(self, taskid): self.connection.srem(self.key, taskid) + def remove_all(self): + for k in self.get_all_keys(): + self.remove(k) + def __contains__(self, taskid): return self.connection.sismember(self.key, taskid) def keys_starting_with(self, exp): return [k for k in self.connection.smembers(self.key) if k.startswith(exp)] + def get_all_keys(self): + return [k for k in self.connection.smembers(self.key)] + def is_queued_for_deletion(self, imgname): taskid = get_taskid('delete', imgname) return taskid in self -def queueit(funcname, *args, **kwargs): - """ - Execute `funcname` function with `args` and `kwargs` if CELERY_ALWAYS_EAGER is True. - Otherwise, check if it's queued already in `TaskRegistry`. If not, add it to `TaskRegistry` and queue it. - """ - - func = getattr(storage, funcname) - taskid = get_taskid(funcname, kwargs.pop('taskid')) - if app.config.get('CELERY_ALWAYS_EAGER'): - return func(*args, **kwargs) - else: - # check it in the registry. - if taskid in imgee.registry: - job = AsyncResult(taskid, app=imgee.celery) - if job.status == celery.states.SUCCESS: - return job.result - else: - # add in the registry and enqueue the job - imgee.registry.add(taskid) - job = func.apply_async(args=args, kwargs=kwargs, task_id=taskid) - return job - - def loading(): """ Returns the `LOADING_IMG` as the content of the response. @@ -90,18 +61,3 @@ def loading(): class StillProcessingException(Exception): pass - - -def get_async_result(job): - """ - If the result of the `job` is not yet ready, return that else raise StillProcessingException. - If the input is `str` instead, return that. - """ - if isinstance(job, AsyncResult): - if job.status == celery.states.SUCCESS: - return job.result - else: - img_name = job.task_id.split(':')[1] - raise StillProcessingException(img_name) - elif isinstance(job, (str, unicode)): - return job diff --git a/imgee/forms.py b/imgee/forms.py index 7785903a..27ce85f4 100644 --- a/imgee/forms.py +++ b/imgee/forms.py @@ -2,7 +2,7 @@ import os.path from coaster import make_name -from flask.ext.wtf import Form +from flask_wtf import FlaskForm from wtforms.validators import Required, ValidationError, Length from wtforms import (FileField, TextField, HiddenField, SelectMultipleField, SelectField) @@ -13,21 +13,22 @@ def valid_file(form, field): - if not is_file_allowed(field.data.stream): + if not is_file_allowed(field.data.stream, field.data.mimetype, field.data.filename): raise ValidationError("Sorry, unknown image format. Please try uploading another file.") -class UploadImageForm(Form): +class UploadImageForm(FlaskForm): file = FileField("File", validators=[Required(), valid_file]) -class DeleteImageForm(Form): +class DeleteImageForm(FlaskForm): pass -class PurgeCacheForm(Form): +class PurgeCacheForm(FlaskForm): pass + def reserved_words(): """get all words which can't be used as labels""" words = [] @@ -49,31 +50,31 @@ def label_doesnt_exist(form, field): raise ValidationError('Label "%s" already exists. Please try another name.' % field.data) -class CreateLabelForm(Form): +class CreateLabelForm(FlaskForm): label = TextField('Label', validators=[Required(), Length(max=250), label_doesnt_exist]) profile_id = HiddenField('profile_id') -class AddLabelForm(Form): +class AddLabelForm(FlaskForm): stored_file_id = HiddenField('stored_file_id') labels = HiddenField('labels') -class RemoveLabelForm(Form): +class RemoveLabelForm(FlaskForm): pass -class EditTitleForm(Form): +class EditTitleForm(FlaskForm): file_name = HiddenField('file_name') file_title = TextField('title', validators=[Required(), Length(max=250)]) -class UpdateTitle(Form): +class UpdateTitle(FlaskForm): title = TextField('Title', validators=[Required(), Length(max=250)]) -class EditLabelForm(Form): +class EditLabelForm(FlaskForm): label_name = TextField('label', validators=[Required(), Length(max=250)]) -class ChangeProfileForm(Form): +class ChangeProfileForm(FlaskForm): profiles = SelectField('Profiles') diff --git a/imgee/models/stored_file.py b/imgee/models/stored_file.py index f39d5936..dc48dec8 100644 --- a/imgee/models/stored_file.py +++ b/imgee/models/stored_file.py @@ -59,6 +59,4 @@ def extn(self): return guess_extension(self.mimetype, self.orig_extn) or '' def is_queued_for_deletion(self): - if imgee.app.config.get('CELERY_ALWAYS_EAGER'): - return False return imgee.registry.is_queued_for_deletion(self.name+self.extn) diff --git a/imgee/storage.py b/imgee/storage.py index e12d1db9..750470b5 100644 --- a/imgee/storage.py +++ b/imgee/storage.py @@ -1,41 +1,49 @@ # -*- coding: utf-8 -*- -import time -import os.path -from subprocess import check_call, CalledProcessError +from datetime import datetime, timedelta from glob import glob +import os.path import re -from celery.result import AsyncResult +from subprocess import check_call, CalledProcessError +import time + +import redis from sqlalchemy import or_ from werkzeug import secure_filename import imgee -from imgee import app, celery from imgee.models import db, Thumbnail, StoredFile -from imgee.async import queueit, get_taskid, BaseTask from imgee.utils import (newid, guess_extension, get_file_type, path_for, get_s3_folder, get_s3_bucket, - download_frm_s3, get_width_height, ALLOWED_MIMETYPES) + download_frm_s3, get_width_height, ALLOWED_MIMETYPES, + exists_in_s3, get_no_previews_url, THUMBNAIL_COMMANDS) # -- functions used in views -- def get_resized_image(img, size, is_thumbnail=False): """ - Check if `img` is available with `size` if not make a one. Return the name of it. + Check if `img` is available with `size` if not make one. Return the name of it. """ + registry = imgee.registry img_name = img.name size_t = parse_size(size) if (size_t and size_t[0] != img.width and size_t[1] != img.height) or ('thumb_extn' in ALLOWED_MIMETYPES[img.mimetype] and ALLOWED_MIMETYPES[img.mimetype]['thumb_extn'] != img.extn): w_or_h = or_(Thumbnail.width == size_t[0], Thumbnail.height == size_t[1]) scaled = Thumbnail.query.filter(w_or_h, Thumbnail.stored_file == img).first() - if scaled: + if scaled and exists_in_s3(scaled): img_name = scaled.name else: size = get_fitting_size((img.width, img.height), size_t) resized_filename = get_resized_filename(img, size) - job = queueit('resize_and_save', img, size, is_thumbnail=is_thumbnail, taskid=resized_filename) - return job + registry = imgee.registry + registry.remove_all() + if resized_filename in registry: + return get_no_previews_url(size) # this doesn't work. Needs test. + else: + registry.add(resized_filename) + img_name = resize_and_save(img, size, is_thumbnail=is_thumbnail) + registry.remove(resized_filename) return img_name @@ -45,7 +53,7 @@ def save(fp, profile, title=None): """ id_ = newid() title = title or secure_filename(fp.filename) - content_type = get_file_type(fp) + content_type = get_file_type(fp, fp.filename) name, extn = os.path.splitext(fp.filename) extn = guess_extension(content_type, extn) img_name = "%s%s" % (id_, extn) @@ -56,8 +64,8 @@ def save(fp, profile, title=None): stored_file = save_img_in_db(name=id_, title=title, local_path=local_path, profile=profile, mimetype=content_type, orig_extn=extn) - job = queueit('save_on_s3', img_name, content_type=content_type, taskid=img_name) - return title, job, stored_file + s3resp = save_on_s3(img_name, content_type=content_type) + return title, stored_file # -- actual saving of image/thumbnail and data in the db and on S3. @@ -82,13 +90,13 @@ def save_tn_in_db(img, tn_name, (tn_w, tn_h)): Save thumbnail info in db. """ name, extn = os.path.splitext(tn_name) - tn = Thumbnail(name=name, width=tn_w, height=tn_h, stored_file=img) - db.session.add(tn) - db.session.commit() + if Thumbnail.query.filter(Thumbnail.name == name).count() == 0: + tn = Thumbnail(name=name, width=tn_w, height=tn_h, stored_file=img) + db.session.add(tn) + db.session.commit() return name -@celery.task(name='imgee.storage.s3-upload', base=BaseTask) def save_on_s3(filename, remotename='', content_type='', bucket='', folder=''): """ Save contents from file named `filename` to `remotename` on S3. @@ -101,13 +109,13 @@ def save_on_s3(filename, remotename='', content_type='', bucket='', folder=''): k = b.new_key(folder+filename) headers = { 'Cache-Control': 'max-age=31536000', # 60*60*24*365 - 'Content-Type': get_file_type(fp), + 'Content-Type': get_file_type(fp, filename), + 'Expires': datetime.now() + timedelta(days=365) } k.set_contents_from_file(fp, policy='public-read', headers=headers) return filename - # -- size calculations -- def parse_size(size): @@ -150,7 +158,11 @@ def get_fitting_size((orig_w, orig_h), size): >>> get_fitting_size((200, 500), (400, 600)) [240, 600] """ - if size[0] == 0 and size[1] == 0: + if orig_w == 0 or orig_h == 0: + # this is either a cdr file or a zero width file + # just go with target size + w, h = size + elif size[0] == 0 and size[1] == 0 and orig_w > 0 and orig_h > 0: w, h = orig_w, orig_h elif size[0] == 0: w, h = orig_w*size[1]/float(orig_h), size[1] @@ -186,7 +198,6 @@ def get_resized_filename(img, size): return name -@celery.task(name='imgee.storage.resize-and-s3-upload', base=BaseTask) def resize_and_save(img, size, is_thumbnail=False): """ Get the original image from local disk cache, download it from S3 if it misses. @@ -221,30 +232,17 @@ def resize_img(src, dest, size, mimetype, format, is_thumbnail): if not size: return src - processed = False - - if 'processor' in ALLOWED_MIMETYPES[mimetype]: - if ALLOWED_MIMETYPES[mimetype]['processor'] == 'rsvg-convert': - try: - check_call('rsvg-convert --width=%s --height=%s --keep-aspect-ratio=TRUE --format=%s %s > %s' - % (size[0], size[1], format, src, dest), shell=True) - except CalledProcessError as e: - return False - processed = True - if not processed: - try: - check_call('convert -quiet -thumbnail %sx%s %s -colorspace sRGB %s' % (size[0], size[1], src, dest), shell=True) - except CalledProcessError: - return False - - # if is_thumbnail: - # # and crop the rest, keeping the center. - # w, h = resized.size - # tw, th = map(int, app.config.get('THUMBNAIL_SIZE').split('x')) - # left, top = int((w-tw)/2), int((h-th)/2) - # resized = resized.crop((left, top, left+tw, top+th)) - - return True + # get processor value, if none specified, use convert + processor = ALLOWED_MIMETYPES[mimetype].get('processor', 'convert') + command = THUMBNAIL_COMMANDS.get(processor) + prepared_command = command.format(width=size[0], height=size[1], format=format, src=src, dest=dest) + print(prepared_command) + try: + check_call(prepared_command, shell=True) + return True + except CalledProcessError as e: + print(e) + return False def clean_local_cache(expiry=24): @@ -263,31 +261,11 @@ def clean_local_cache(expiry=24): return n -def wait_for_asynctasks(stored_file): - registry = imgee.registry - - if not registry.connection: - return - - # wait for upload to be complete, if any. - taskid = get_taskid('save_on_s3', stored_file.name+stored_file.extn) - if taskid in registry: - AsyncResult(taskid).get() - - # wait for all resizes to be complete, if any. - s = get_taskid('resize_and_save', stored_file.name) - for taskid in registry.keys_starting_with(s): - AsyncResult(taskid).get() - - -@celery.task(name='imgee.storage.delete', base=BaseTask) def delete(stored_file): """ Delete all the thumbnails and images associated with a file, from local cache and S3. Wait for the upload/resize to complete if queued for the same image. """ - if not app.config.get('CELERY_ALWAYS_EAGER'): - wait_for_asynctasks(stored_file) # remove locally cache_path = app.config.get('UPLOADED_FILES_DEST') diff --git a/imgee/templates/profile.html b/imgee/templates/profile.html index ae98a1a4..bd250a77 100644 --- a/imgee/templates/profile.html +++ b/imgee/templates/profile.html @@ -7,11 +7,12 @@ {{ title_form.hidden_tag() }}
+ {% endif %}
-{% endblock %} +{% endblock %} {% block maincontent %}
@@ -44,7 +46,7 @@

@@ -95,14 +97,14 @@

-
+
- +
- -
+

- {% endfor %} + {% endfor %}
- + @@ -167,7 +169,7 @@

$(function(){ $('.dropzone').css('min-height', '30px'); - }); + });