Skip to content

Commit

Permalink
Merge pull request #267 from datamade/patch/hcg/refresh-pic-select
Browse files Browse the repository at this point in the history
Expand refresh_pic to capture documents associated with future events
  • Loading branch information
hancush authored Jun 2, 2020
2 parents a3ae44d + 6b87651 commit 3dc7aa3
Showing 1 changed file with 31 additions and 6 deletions.
37 changes: 31 additions & 6 deletions councilmatic_core/management/commands/refresh_pic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
from django.core.exceptions import ImproperlyConfigured
from django.core.management.base import BaseCommand
from django.conf import settings
from django.db.models import Q
import pytz

from opencivicdata.legislative.models import BillDocumentLink, EventDocumentLink
from opencivicdata.legislative.models import BillDocumentLink, EventDocumentLink, \
EventRelatedEntity


for configuration in ['AWS_KEY','AWS_SECRET']:
Expand Down Expand Up @@ -41,15 +43,38 @@ def handle(self, *args, **options):
logger.info(("Removed {} document(s) from the councilmatic-document-cache").format(len(aws_keys)))

def _get_urls(self):
'''
Get the URLs of bill and event documents if the related bill or event
has been updated in the past hour, or if they are releated to an event
that is scheduled for a future date, as these are the documents that are
most likely to change.
This is a workaround for a known issue where making changes to data in
Legistar (DataMade's source data system) does not always update timestamps
that tell us to rescrape entities, toggling the updated timestamps in
our database.
'''
app_timezone = pytz.timezone(settings.TIME_ZONE)
one_hour_ago = app_timezone.localize(datetime.datetime.now()) - datetime.timedelta(hours=1)

bill_docs = BillDocumentLink.objects.filter(document__bill__versions__isnull=False,
document__bill__updated_at__gte=one_hour_ago)\
.values_list('url', flat=True)
has_versions = Q(document__bill__versions__isnull=False)

event_docs = EventDocumentLink.objects.filter(document__event__updated_at__gte=one_hour_ago)\
.values_list('url', flat=True)
recently_updated = Q(document__bill__updated_at__gte=one_hour_ago)

bills_on_upcoming_agendas = EventRelatedEntity.objects.filter(
bill__isnull=False,
agenda_item__event__start_date__gt=one_hour_ago
).values_list('bill__id')

upcoming = Q(document__bill__id__in=bills_on_upcoming_agendas)

bill_docs = BillDocumentLink.objects.filter(
has_versions & (recently_updated | upcoming)
).values_list('url', flat=True)

event_docs = EventDocumentLink.objects.filter(
Q(document__event__updated_at__gte=one_hour_ago) | Q(document__event__start_date__gt=one_hour_ago)
).values_list('url', flat=True)

return itertools.chain(bill_docs, event_docs)

Expand Down

0 comments on commit 3dc7aa3

Please sign in to comment.