diff --git a/.gitattributes b/.gitattributes
index 19ba67b5d3..b03edb6003 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -9,7 +9,7 @@
 
 *.conf text eol=lf
 *.config text eol=lf
-*.cpanfile text eol=lf
+cpanfile text eol=lf
 *.css text eol=lf
 *.csv text eol=lf
 *.enabled_plugins text eol=lf
diff --git a/.github/free-up-disk-space.sh b/.github/free-up-disk-space.sh
index b0bbd19e09..acc6c85de8 100755
--- a/.github/free-up-disk-space.sh
+++ b/.github/free-up-disk-space.sh
@@ -25,6 +25,11 @@ sudo rm -f /swapfile
 echo "Cleaning APT cache..."
 sudo apt clean
 
+echo "Removing some directories..."
+sudo rm -rf /usr/local/lib/android/
+sudo rm -rf /usr/local/lib/node_modules/
+sudo rm -rf /usr/local/share/chromium/
+
 echo "Removing docker images..."
 docker rmi $(docker image ls -aq)
 
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d40605f77f..417bddc68d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -180,9 +180,11 @@ jobs:
           MC_DOWNLOADS_AMAZON_S3_SECRET_ACCESS_KEY: ${{ secrets.MC_DOWNLOADS_AMAZON_S3_SECRET_ACCESS_KEY }}
           MC_FACEBOOK_APP_ID: ${{ secrets.MC_FACEBOOK_APP_ID }}
           MC_FACEBOOK_APP_SECRET: ${{ secrets.MC_FACEBOOK_APP_SECRET }}
-          MC_PODCAST_FETCH_EPISODE_BUCKET_NAME: ${{ secrets.MC_PODCAST_FETCH_EPISODE_BUCKET_NAME }}
+          MC_PODCAST_AUTH_JSON_BASE64: ${{ secrets.MC_PODCAST_AUTH_JSON_BASE64 }}
+          MC_PODCAST_RAW_ENCLOSURES_BUCKET_NAME: ${{ secrets.MC_PODCAST_RAW_ENCLOSURES_BUCKET_NAME }}
+          MC_PODCAST_TRANSCODED_EPISODES_BUCKET_NAME: ${{ secrets.MC_PODCAST_TRANSCODED_EPISODES_BUCKET_NAME }}
+          MC_PODCAST_TRANSCRIPTS_BUCKET_NAME: ${{ secrets.MC_PODCAST_TRANSCRIPTS_BUCKET_NAME }}
           MC_PODCAST_FETCH_TRANSCRIPT_RUN_COSTLY_TEST: ${{ secrets.MC_PODCAST_FETCH_TRANSCRIPT_RUN_COSTLY_TEST }}
-          MC_PODCAST_GC_AUTH_JSON_BASE64: ${{ secrets.MC_PODCAST_GC_AUTH_JSON_BASE64 }}
           MC_TWITTER_ACCESS_TOKEN: ${{ secrets.MC_TWITTER_ACCESS_TOKEN }}
           MC_TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.MC_TWITTER_ACCESS_TOKEN_SECRET }}
           MC_TWITTER_CONSUMER_KEY: ${{ secrets.MC_TWITTER_CONSUMER_KEY }}
diff --git a/.gitignore b/.gitignore
index dba8fe1ceb..99845389ff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -59,7 +59,9 @@ coverage.json
 **/.idea/**/dataSources/
 **/.idea/**/dataSources.ids
 **/.idea/**/dataSources.local.xml
-**/.idea/**/sqlDataSources.xml
+
+# Not ignoring **/.idea/**/sqlDataSources.xml as it points to ./.idea/mediawords.sql
+
 **/.idea/**/dynamic.xml
 **/.idea/**/uiDesigner.xml
 **/.idea/**/dbnavigator.xml
diff --git a/.gitmodules b/.gitmodules
index 70ec6e112b..a6f576b719 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -25,12 +25,18 @@
 [submodule "dev/quieter-docker-compose"]
 	path = dev/quieter-docker-compose
 	url = https://github.com/mediacloud/docker-compose-just-quieter.git
-[submodule "apps/podcast-fetch-episode/tests/data/media-samples"]
-	path = apps/podcast-fetch-episode/tests/data/media-samples
-	url = https://github.com/mediacloud/podcast-media-samples.git
-[submodule "apps/podcast-fetch-transcript/tests/data/media-samples"]
-	path = apps/podcast-fetch-transcript/tests/data/media-samples
+[submodule "apps/podcast-transcribe-episode/tests/data/media-samples"]
+	path = apps/podcast-transcribe-episode/tests/data/media-samples
 	url = https://github.com/mediacloud/podcast-media-samples.git
 [submodule "apps/elk-journalbeat/journald-log-sample"]
 	path = apps/elk-journalbeat/journald-log-sample
 	url = https://github.com/mediacloud/journald-log-sample.git
+[submodule "apps/temporal-grafana/dashboards"]
+	path = apps/temporal-grafana/dashboards
+	url = https://github.com/temporalio/dashboards.git
+[submodule "apps/temporal-server/config"]
+	path = apps/temporal-server/config
+	url = https://github.com/mediacloud/backend-temporal-server-config.git
+[submodule "apps/temporal-postgresql/temporal-config"]
+	path = apps/temporal-postgresql/temporal-config
+	url = https://github.com/mediacloud/backend-temporal-server-config.git
diff --git a/apps/base/Dockerfile b/apps/base/Dockerfile
index 6f12c7fdde..7fb5875dd1 100644
--- a/apps/base/Dockerfile
+++ b/apps/base/Dockerfile
@@ -7,7 +7,7 @@
 #
 
 # https://hub.docker.com/_/ubuntu?tab=tags&page=1
-FROM ubuntu:focal-20210119
+FROM ubuntu:focal-20210416
 
 ENV DEBIAN_FRONTEND=noninteractive \
     LANG=en_US.UTF-8 \
@@ -48,6 +48,9 @@ RUN \
     apt-get -y --no-install-recommends install \
         # Quicker container debugging
         bash-completion \
+        # "mail" utility which uses sendmail (provided by msmtp-mta) internally;
+        # some tools like munin-cron use "mail" to send emails
+        bsd-mailx \
         curl \
         htop \
         # apt-key
@@ -56,7 +59,8 @@ RUN \
         iproute2 \
         # Pinging other containers from within Compose environment
         iputils-ping \
-        # Sending mail via sendmail utility through mail-postfix-server
+        # Provides "sendmail" utility which relays email through
+        # "mail-postfix-server" app
         msmtp \
         msmtp-mta \
         # Provides killall among other utilities
@@ -67,8 +71,6 @@ RUN \
         netcat \
         # Some packages insist on logging to syslog
         rsyslog \
-        # "mail" utility (which uses msmtp internally)
-        s-nail \
         # Timezone data, used by many packages
         tzdata \
         # Basic editor for files in container while debugging
@@ -90,6 +92,25 @@ COPY bin/container_memory_limit.sh bin/container_cpu_limit.sh bin/dl_to_stdout.s
 # Copy MSMTP configuration
 COPY conf/msmtprc conf/msmtp-aliases /etc/
 
+# Both "sendmail" and "mail" utilities are important as they're used by various
+# apps (e.g. munin-cron) to send us important email, and those apps aren't
+# particularly vocal when they're unable to send email. So, for extra paranoia,
+# verify that both utilities point to correct symlinks here.
+RUN \
+    if [ "$(readlink -- "/usr/sbin/sendmail")" != "../bin/msmtp" ]; then \
+        echo "sendmail is not symlinked to msmtp, sending email won't work." && \
+        exit 1; \
+    fi; \
+    if [ "$(readlink -- "/usr/bin/mail")" != "/etc/alternatives/mail" ]; then \
+        echo "mail is not symlinked to /etc/alternatives/mail, sending email won't work." && \
+        exit 1; \
+    fi; \
+    if [ "$(readlink -- "/etc/alternatives/mail")" != "/usr/bin/bsd-mailx" ]; then \
+        echo "mail is not symlinked to /etc/alternatives/mail, sending email won't work." && \
+        exit 1; \
+    fi; \
+    true
+
 # Generate and set locale
 RUN \
     locale-gen en_US en_US.UTF-8 && \
diff --git a/apps/cliff-annotator/Dockerfile b/apps/cliff-annotator/Dockerfile
index 7db0f3ae4d..df2ccad7e5 100644
--- a/apps/cliff-annotator/Dockerfile
+++ b/apps/cliff-annotator/Dockerfile
@@ -27,7 +27,7 @@ RUN \
 # Install Tomcat 7
 RUN \
     mkdir -p /usr/lib/tomcat7/ && \
-    /dl_to_stdout.sh "https://archive.apache.org/dist/tomcat/tomcat-7/v7.0.96/bin/apache-tomcat-7.0.96.tar.gz" | \
+    /dl_to_stdout.sh "https://mediacloud-archive-apache-org.s3.amazonaws.com/apache-tomcat-7.0.96.tar.gz" | \
         tar -zx -C /usr/lib/tomcat7/ --strip 1 && \
     true
 
diff --git a/apps/cliff-fetch-annotation-and-tag/.dockerignore b/apps/cliff-fetch-annotation-and-tag/.dockerignore
index 752414ae9c..9b2c362a80 100644
--- a/apps/cliff-fetch-annotation-and-tag/.dockerignore
+++ b/apps/cliff-fetch-annotation-and-tag/.dockerignore
@@ -89,3 +89,4 @@ sdist
 Temporary Items
 wheels
 _Inline
+
diff --git a/apps/cliff-fetch-annotation-and-tag/.idea/cliff-fetch-annotation-and-tag.iml b/apps/cliff-fetch-annotation-and-tag/.idea/cliff-fetch-annotation-and-tag.iml
index 4aaca228bb..5f8a5e5f93 100644
--- a/apps/cliff-fetch-annotation-and-tag/.idea/cliff-fetch-annotation-and-tag.iml
+++ b/apps/cliff-fetch-annotation-and-tag/.idea/cliff-fetch-annotation-and-tag.iml
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$/../cliff-fetch-annotation-and-tag" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.9 Docker Compose (cliff-fetch-annotation-and-tag at [/Users/jamesotoole/mediacloud/backend/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cliff-fetch-annotation-and-tag at [/home/pypt/m/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/cliff-fetch-annotation-and-tag/.idea/mediawords.sql b/apps/cliff-fetch-annotation-and-tag/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cliff-fetch-annotation-and-tag/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cliff-fetch-annotation-and-tag/.idea/misc.xml b/apps/cliff-fetch-annotation-and-tag/.idea/misc.xml
index 5914dad53f..907e6bae2a 100644
--- a/apps/cliff-fetch-annotation-and-tag/.idea/misc.xml
+++ b/apps/cliff-fetch-annotation-and-tag/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.9 Docker Compose (cliff-fetch-annotation-and-tag at [/Users/jamesotoole/mediacloud/backend/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cliff-fetch-annotation-and-tag at [/home/pypt/m/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cliff-fetch-annotation-and-tag/.idea/sqlDataSources.xml b/apps/cliff-fetch-annotation-and-tag/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..3228ec2234
--- /dev/null
+++ b/apps/cliff-fetch-annotation-and-tag/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="43ddd8f7-90d1-486e-8244-61909c65e684" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cliff-fetch-annotation-and-tag/.idea/sqldialects.xml b/apps/cliff-fetch-annotation-and-tag/.idea/sqldialects.xml
index 790b3f37f8..92fefa2e78 100644
--- a/apps/cliff-fetch-annotation-and-tag/.idea/sqldialects.xml
+++ b/apps/cliff-fetch-annotation-and-tag/.idea/sqldialects.xml
@@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="SqlDialectMappings">
+    <file url="file://$PROJECT_DIR$/.idea/mediawords.sql" dialect="PostgreSQL" />
     <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
     <file url="PROJECT" dialect="PostgreSQL" />
   </component>
diff --git a/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml b/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml
index 5b589005cd..a97835a29f 100644
--- a/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml
+++ b/apps/cliff-fetch-annotation-and-tag/docker-compose.tests.yml
@@ -54,5 +54,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/common/.idea/common.iml b/apps/common/.idea/common.iml
index 492339729c..bb46b8eafc 100644
--- a/apps/common/.idea/common.iml
+++ b/apps/common/.idea/common.iml
@@ -2,9 +2,12 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="common" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (common at [/home/pypt/m/apps/common/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
+  <component name="PackageRequirementsSettings">
+    <option name="requirementsPath" value="$MODULE_DIR$/src/requirements.txt" />
+  </component>
   <component name="TemplatesService">
     <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
     <option name="TEMPLATE_FOLDERS">
@@ -14,7 +17,6 @@
     </option>
   </component>
   <component name="TestRunnerService">
-    <option name="projectConfiguration" value="pytest" />
     <option name="PROJECT_TEST_RUNNER" value="pytest" />
   </component>
 </module>
\ No newline at end of file
diff --git a/apps/common/.idea/inspectionProfiles/Project_Default.xml b/apps/common/.idea/inspectionProfiles/Project_Default.xml
index 76ebfe820e..d3d52a9b48 100644
--- a/apps/common/.idea/inspectionProfiles/Project_Default.xml
+++ b/apps/common/.idea/inspectionProfiles/Project_Default.xml
@@ -1,6 +1,7 @@
 <component name="InspectionProjectProfileManager">
   <profile version="1.0">
     <option name="myName" value="Project Default" />
+    <inspection_tool class="HttpUrlsUsage" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
     <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
       <option name="ignoredPackages">
         <value>
diff --git a/apps/common/.idea/mediawords.sql b/apps/common/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/common/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/common/.idea/misc.xml b/apps/common/.idea/misc.xml
index 5e60c007a8..c53ab64922 100644
--- a/apps/common/.idea/misc.xml
+++ b/apps/common/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="common" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (common at [/home/pypt/m/apps/common/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/common/.idea/sqlDataSources.xml b/apps/common/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..2cf8b2da55
--- /dev/null
+++ b/apps/common/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="0e4873e9-74b1-40a5-89cb-ea805c1aa6c3" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/common/.idea/sqldialects.xml b/apps/common/.idea/sqldialects.xml
index 790b3f37f8..92fefa2e78 100644
--- a/apps/common/.idea/sqldialects.xml
+++ b/apps/common/.idea/sqldialects.xml
@@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="SqlDialectMappings">
+    <file url="file://$PROJECT_DIR$/.idea/mediawords.sql" dialect="PostgreSQL" />
     <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
     <file url="PROJECT" dialect="PostgreSQL" />
   </component>
diff --git a/apps/common/docker-compose.tests.yml b/apps/common/docker-compose.tests.yml
index f822f335f5..17126383ee 100644
--- a/apps/common/docker-compose.tests.yml
+++ b/apps/common/docker-compose.tests.yml
@@ -13,6 +13,12 @@ services:
             MC_DOWNLOADS_AMAZON_S3_DIRECTORY_NAME: "${MC_DOWNLOADS_AMAZON_S3_DIRECTORY_NAME}"
             MC_PUBLIC_STORE_TYPE: "postgresql"
             MC_PUBLIC_STORE_SALT: "foo"
+            # Email address to point to in List-Unsubscribe email header.
+            # Technically we don't have a straightforward "unsubscribe" endpoint, but our 
+            # emails are more likely to be marked spam if we don't have such a header, so
+            # we make the email subject "Delete account and unsubscribe" in 
+            # mediawords/util/config/common.py
+            MC_EMAIL_UNSUBSCRIBE: "support@example.com"
         volumes:
             - type: bind
               source: ./src/
@@ -91,8 +97,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/common/src/perl/MediaWords/Languages/Language/PythonWrapper.pm b/apps/common/src/perl/MediaWords/Languages/Language/PythonWrapper.pm
index 384318b8e6..67dad9a001 100644
--- a/apps/common/src/perl/MediaWords/Languages/Language/PythonWrapper.pm
+++ b/apps/common/src/perl/MediaWords/Languages/Language/PythonWrapper.pm
@@ -71,6 +71,15 @@ sub stop_words_map($)
     return $stop_words_map;
 }
 
+# FIXME remove once stopword comparison is over
+sub stop_words_old_map($)
+{
+    my $self = shift;
+
+    my $stop_words_old_map = $self->{ _python_lang }->stop_words_old_map();
+    return $stop_words_old_map;
+}
+
 sub stem_words($$)
 {
     my ( $self, $words ) = @_;
diff --git a/apps/common/src/python/mediawords/db/__init__.py b/apps/common/src/python/mediawords/db/__init__.py
index f03065ed5a..0b98f8fb16 100644
--- a/apps/common/src/python/mediawords/db/__init__.py
+++ b/apps/common/src/python/mediawords/db/__init__.py
@@ -1,18 +1,31 @@
 import time
+from typing import Optional
 
 from mediawords.db.handler import DatabaseHandler
-from mediawords.util.config.common import CommonConfig
+from mediawords.util.config.common import CommonConfig, DatabaseConfig, ConnectRetriesConfig
 from mediawords.util.log import create_logger
-from mediawords.util.perl import decode_object_from_bytes_if_needed
 from mediawords.util.process import fatal_error
 
 log = create_logger(__name__)
 
 
-def connect_to_db() -> DatabaseHandler:
-    """Connect to PostgreSQL."""
+class McConnectToDBError(Exception):
+    """Exception that gets raised if connect_to_db() runs out of retries and
+    db_config.retries.fatal_error_on_failure is set to False."""
+    pass
+
+
+def connect_to_db(db_config: Optional[DatabaseConfig] = None) -> DatabaseHandler:
+    """
+    Connect to PostgreSQL (via PgBouncer).
+
+    :param db_config: Optional DatabaseConfig parameter to specify connection retry parameters.
+    :return: DatabaseHandler object.
+    """
+
+    if not db_config:
+        db_config = CommonConfig.database()
 
-    db_config = CommonConfig.database()
     retries_config = db_config.retries()
 
     assert retries_config.max_attempts() > 0, "max_tries can't be negative."
@@ -57,12 +70,34 @@ def connect_to_db() -> DatabaseHandler:
             else:
                 log.info("Out of retries, giving up and exiting...")
 
-                # Don't throw any exceptions because they might be caught by
-                # the try-catch block, and so the caller will just assume that
-                # there was something wrong with the input data and proceed
-                # with processing next item in the job queue (e.g. the next
-                # story). Instead, just quit and wait for someone to restart
-                # the whole app that requires database access.
-                fatal_error(error_message)
+                if retries_config.fatal_error_on_failure():
+                    # Don't throw any exceptions because they might be caught by
+                    # the try-catch block, and so the caller will just assume that
+                    # there was something wrong with the input data and proceed
+                    # with processing next item in the job queue (e.g. the next
+                    # story). Instead, just quit and wait for someone to restart
+                    # the whole app that requires database access.
+                    fatal_error(error_message)
+                else:
+                    raise McConnectToDBError(error_message)
 
     return db
+
+
+def connect_to_db_or_raise() -> DatabaseHandler:
+    """
+    Shorthand for connect_to_db() with its own retries and fatal_error() disabled.
+
+    By default, connect_to_db() will attempt connecting to PostgreSQL a few times and would call fatal_error() on
+    failures and stop the whole process.
+
+    Useful in workflows, i.e. it's better to leave all of the retrying to Temporal.
+    """
+    return connect_to_db(
+        db_config=DatabaseConfig(
+            retries=ConnectRetriesConfig(
+                max_attempts=1,
+                fatal_error_on_failure=False,
+            )
+        )
+    )
diff --git a/apps/common/src/python/mediawords/job/__init__.py b/apps/common/src/python/mediawords/job/__init__.py
index 7a110d32e7..428599bc03 100644
--- a/apps/common/src/python/mediawords/job/__init__.py
+++ b/apps/common/src/python/mediawords/job/__init__.py
@@ -10,7 +10,7 @@
 from mediawords.db import connect_to_db, DatabaseHandler
 from mediawords.db.locks import get_session_lock, release_session_lock
 from mediawords.job.states import STATE_QUEUED, STATE_RUNNING, STATE_COMPLETED, STATE_ERROR
-from mediawords.util.config.common import CommonConfig
+from mediawords.util.config.common import CommonConfig, RabbitMQConfig
 from mediawords.util.log import create_logger
 from mediawords.util.parse_json import encode_json, decode_json
 from mediawords.util.perl import decode_object_from_bytes_if_needed
@@ -382,7 +382,7 @@ class JobBroker(object):
         '__queue_name',
     ]
 
-    def __init__(self, queue_name: str):
+    def __init__(self, queue_name: str, rabbitmq_config: Optional[RabbitMQConfig] = None):
         """
         Create job broker object.
 
@@ -397,7 +397,9 @@ def __init__(self, queue_name: str):
 
         config = CommonConfig()
 
-        rabbitmq_config = config.rabbitmq()
+        if not rabbitmq_config:
+            rabbitmq_config = config.rabbitmq()
+
         broker_uri = 'amqp://{username}:{password}@{hostname}:{port}/{vhost}'.format(
             username=rabbitmq_config.username(),
             password=rabbitmq_config.password(),
@@ -440,6 +442,19 @@ def __init__(self, queue_name: str):
 
         self.__app.conf.worker_max_tasks_per_child = 1000
 
+        retries_config = rabbitmq_config.retries()
+        if retries_config:
+            self.__app.task_publish_retry = True
+            self.__app.task_publish_retry_policy = {
+                'max_retries': retries_config.max_retries(),
+                'interval_start': retries_config.interval_start(),
+                'interval_step': retries_config.interval_step(),
+                'interval_max': retries_config.interval_max(),
+            }
+
+        else:
+            self.__app.task_publish_retry = False
+
         queue = Queue(
             name=queue_name,
             exchange=Exchange(queue_name),
diff --git a/apps/common/src/python/mediawords/languages/__init__.py b/apps/common/src/python/mediawords/languages/__init__.py
index b985445af4..efbfac1fcb 100644
--- a/apps/common/src/python/mediawords/languages/__init__.py
+++ b/apps/common/src/python/mediawords/languages/__init__.py
@@ -50,7 +50,7 @@ class AbstractLanguage(object, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def language_code() -> str:
         """Return ISO 639-1 language code, e.g. 'en'."""
-        raise NotImplemented("Abstract method.")
+        raise NotImplementedError("Abstract method.")
 
     @staticmethod
     @abc.abstractmethod
@@ -63,7 +63,7 @@ def sample_sentence() -> str:
         * Wikipedia
         * cld2-cffi's unit test: https://github.com/GregBowyer/cld2-cffi/blob/master/tests/test_cld.py
         """
-        raise NotImplemented("Abstract method.")
+        raise NotImplementedError("Abstract method.")
 
     # MC_REWRITE_TO_PYTHON: use set after rewrite to Python
     @abc.abstractmethod
@@ -81,6 +81,12 @@ def stop_words_map(self) -> Dict[str, bool]:
         """
         raise NotImplementedError("Abstract method.")
 
+    # FIXME remove once stopword comparison is over
+    @abc.abstractmethod
+    def stop_words_old_map(self) -> Dict[str, bool]:
+        """Return map of old stopwords."""
+        raise NotImplementedError("Abstract method.")
+
     @abc.abstractmethod
     def stem_words(self, words: List[str]) -> List[str]:
         """Return list of stems for a list of words.
@@ -283,6 +289,9 @@ def __init__(self):
         # Stop words map (lazy initialized)
         self.__stop_words_map = None
 
+        # FIXME remove once stopword comparison is over
+        self.__stop_words_old_map = None
+
     def stop_words_map(self) -> Dict[str, bool]:
         """Return stop word map read from a file."""
         if self.__stop_words_map is None:
@@ -312,3 +321,33 @@ def stop_words_map(self) -> Dict[str, bool]:
             self.__stop_words_map = stop_words
 
         return self.__stop_words_map
+
+    # FIXME remove once stopword comparison is over
+    def stop_words_old_map(self) -> Dict[str, bool]:
+        if self.__stop_words_old_map is None:
+
+            stop_words_path = os.path.join(
+                os.path.dirname(os.path.abspath(__file__)),
+                self.language_code(),
+                '%s_stop_words_old.txt' % self.language_code(),
+            )
+            if stop_words_path is None:
+                raise McLanguageException("Stop words file path is None.")
+
+            if not os.path.isfile(stop_words_path):
+                raise McLanguageException("Stop words file does not exist at path '%s'." % stop_words_path)
+
+            stop_words = dict()
+            with open(stop_words_path, 'r', encoding='utf-8') as f:
+                for stop_word in f.readlines():
+                    # Remove comments
+                    stop_word = re.sub(r'\s*?#.*?$', '', stop_word)
+
+                    stop_word = stop_word.strip()
+
+                    if len(stop_word) > 0:
+                        stop_words[stop_word] = True
+
+            self.__stop_words_old_map = stop_words
+
+        return self.__stop_words_old_map
diff --git a/apps/common/src/python/mediawords/languages/ca/ca_stop_words.txt b/apps/common/src/python/mediawords/languages/ca/ca_stop_words.txt
index eaf6168385..40abbeb608 100644
--- a/apps/common/src/python/mediawords/languages/ca/ca_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/ca/ca_stop_words.txt
@@ -1,10 +1,8 @@
-#
-# This is a stop word list for the Catalan language.
-#
-# Sources:
-#     https://raw.githubusercontent.com/stopwords-iso/stopwords-ca/master/stopwords-ca.txt
+# # Sources:
 #     http://latel.upf.edu/morgana/altres/pub/ca_stop.htm
+#     https://raw.githubusercontent.com/stopwords-iso/stopwords-ca/master/stopwords-ca.txt
 #     https://www.ranks.nl/stopwords/catalan
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 #
 
 a
@@ -12,10 +10,12 @@ abans
 abans-d'ahir
 abintestat
 ací
-adesiara
+açò
 adàgio
 adés
+adesiara
 adéu
+àdhuc
 ah
 ahir
 ai
@@ -28,13 +28,15 @@ aixà
 així
 això
 al
+alça
 aleshores
+algú
 algun
 alguna
 algunes
 alguns
-algú
 alhora
+àlies
 allà
 allèn
 allí
@@ -48,13 +50,12 @@ altres
 altresí
 altri
 al·legro
-alça
 amargament
 amb
-ambdues
 ambdós
-amunt
+ambdues
 amén
+amunt
 anar
 anc
 andante
@@ -73,11 +74,11 @@ aquell
 aquella
 aquelles
 aquells
+aquèn
 aquest
 aquesta
 aquestes
 aquests
-aquèn
 aquí
 ara
 arran
@@ -92,7 +93,6 @@ avall
 avant
 aviat
 avui
-açò
 bah
 baix
 baldament
@@ -100,17 +100,18 @@ ballmanetes
 banzim-banzam
 bastant
 bastants
+bé
 ben
 bis
 bitllo-bitllo
 bo
-bé
 ca
+ça
 cada
+cadascú
 cadascuna
 cadascunes
 cadascuns
-cadascú
 cal
 cap
 car
@@ -126,10 +127,11 @@ certes
 certs
 cinc
 cinquanta
+cinquè
 cinquena
 cinquenes
 cinquens
-cinquè
+ço
 com
 comsevulla
 consegueixo
@@ -147,10 +149,10 @@ d'un
 d'una
 d'unes
 d'uns
-daixonses
 daixò
-dallonses
+daixonses
 dallò
+dallonses
 dalt
 daltabaix
 damunt
@@ -160,6 +162,7 @@ davall
 davant
 de
 debades
+deçà
 dedins
 defora
 dejorn
@@ -167,12 +170,13 @@ dejús
 del
 dellà
 dels
-dementre
-dempeus
 demà
+dementre
 demés
+dempeus
 des
 des de
+desè
 desena
 desenes
 desens
@@ -180,11 +184,9 @@ després
 dessobre
 dessota
 dessús
-desè
 deu
 devers
 devora
-deçà
 diferents
 dinou
 dins
@@ -217,6 +219,7 @@ emperò
 en
 enans
 enant
+ençà
 encara
 encontinent
 endalt
@@ -224,9 +227,9 @@ endarrera
 endarrere
 endavant
 endebades
-endemig
 endemà
 endemés
+endemig
 endins
 endintre
 enfora
@@ -235,8 +238,8 @@ enguany
 enguanyasses
 enjús
 enlaire
-enlloc
 enllà
+enlloc
 enrera
 enrere
 ens
@@ -250,65 +253,69 @@ entretant
 entrò
 envers
 envides
-environs
 enviró
-ençà
+environs
 ep
 era
 erem
+érem
 eren
 eres
+éreu
 ergo
 es
+és
 escar
 essent
+éssent
 esser
+ésser
 est
 esta
+està
 estada
 estades
 estan
 estant
 estar
+estarà
 estaran
+estaràs
+estaré
 estarem
 estareu
 estaria
+estaríem
 estarien
 estaries
-estarà
-estaràs
-estaré
-estaríem
 estaríeu
+estàs
 estat
 estats
 estava
+estàvem
 estaven
 estaves
+estàveu
 estem
 estes
 esteu
 estic
+estigué
 estiguem
+estiguérem
 estigueren
 estigueres
+estiguéreu
 estigues
+estigués
 estiguessis
 estigueu
 estigui
+estiguí
 estiguin
 estiguis
-estigué
-estiguérem
-estiguéreu
-estigués
-estiguí
 estos
-està
-estàs
-estàvem
-estàveu
 et
 etc
 etcètera
@@ -325,20 +332,20 @@ feu
 fi
 fins
 fora
+fóra
+força
+fórem
 foren
 fores
-força
+fóreu
 fos
+fóssim
 fossin
 fossis
+fóssiu
 fou
 fra
 fui
-fóra
-fórem
-fóreu
-fóssim
-fóssiu
 gaire
 gairebé
 gaires
@@ -347,45 +354,47 @@ girientorn
 gratis
 ha
 hagi
+hàgim
 hagin
 hagis
+hàgiu
 haguda
 hagudes
-hagueren
-hagueres
-haguessin
-haguessis
-hagut
-haguts
 hagué
 haguérem
+hagueren
+hagueres
 haguéreu
 hagués
 haguéssim
+haguessin
+haguessis
 haguéssiu
 haguí
+hagut
+haguts
 hala
 han
 has
+haurà
 hauran
+hauràs
+hauré
 haurem
 haureu
 hauria
+hauríem
 haurien
 hauries
-haurà
-hauràs
-hauré
-hauríem
 hauríeu
 havem
 havent
 haver
 haveu
 havia
+havíem
 havien
 havies
-havíem
 havíeu
 he
 hem
@@ -394,13 +403,12 @@ hi
 ho
 hom
 hui
-hàgim
-hàgiu
 i
+ídem
 igual
 iguals
-inclusive
 inclòs
+inclusive
 ja
 jamai
 jo
@@ -444,6 +452,7 @@ mentre
 mentrestant
 menys
 mes
+més
 meu
 meua
 meues
@@ -466,7 +475,6 @@ molts
 mon
 mons
 mos
-més
 n
 n'he
 n'hi
@@ -479,35 +487,37 @@ nogensmenys
 només
 noranta
 nos
+nós
+nòs
 nosaltres
 nostra
 nostre
 nostres
 nou
+novè
 novena
 novenes
 novens
-novè
 ns
-nòs
-nós
 o
 oh
 oi
 oidà
+òlim
 on
 onsevulga
 onsevulla
 onze
 pas
+pàssim
 pel
 pels
 pengim-penjam
 per
 per que
+però
 perquè
 pertot
-però
 piano
 pla
 poc
@@ -532,7 +542,6 @@ prou
 puc
 puix
 pus
-pàssim
 qual
 quals
 qualsevol
@@ -552,6 +561,7 @@ quarts
 quasi
 quatre
 que
+què
 quelcom
 qui
 quin
@@ -560,7 +570,6 @@ quines
 quins
 quinze
 quisvulla
-què
 ran
 re
 rebé
@@ -596,25 +605,25 @@ sengles
 sens
 sense
 ser
+serà
 seran
+seràs
+seré
 serem
 sereu
 seria
+seríem
 serien
 series
-serà
-seràs
-seré
-seríem
 seríeu
 ses
 set
 setanta
+setè
 setena
 setenes
 setens
 setze
-setè
 seu
 seua
 seues
@@ -622,6 +631,7 @@ seus
 seva
 seves
 si
+sí
 sia
 siau
 sic
@@ -633,13 +643,14 @@ siguin
 siguis
 sinó
 sis
+sisè
 sisena
 sisenes
 sisens
-sisè
 sobre
 sobretot
 soc
+sóc
 sol
 sola
 solament
@@ -647,6 +658,7 @@ soles
 sols
 som
 son
+són
 sons
 sos
 sota
@@ -654,9 +666,6 @@ sots
 sou
 sovint
 suara
-sí
-sóc
-són
 t
 t'ha
 t'han
@@ -712,14 +721,23 @@ u
 uf
 ui
 uix
+últim
+última
+últimes
+últims
 ultra
 un
 una
 unes
+únic
+única
+únics
+úniques
 uns
 up
 upa
 us
+ús
 va
 vagi
 vagin
@@ -728,54 +746,34 @@ vaig
 vair
 vam
 van
+vàreig
+vàrem
 vares
+vàreu
 vas
 vau
 vem
 verbigràcia
 vers
+vés
 vet
 veu
 vint
 vora
 vos
+vós
 vosaltres
+vostè
+vostès
 vostra
 vostre
 vostres
-vostè
-vostès
 vuit
 vuitanta
+vuitè
 vuitena
 vuitenes
 vuitens
-vuitè
-vàreig
-vàrem
-vàreu
-vés
-vós
 xano-xano
 xau-xau
-xec
-àdhuc
-àlies
-ça
-ço
-érem
-éreu
-és
-éssent
-ésser
-ídem
-òlim
-últim
-última
-últimes
-últims
-únic
-única
-únics
-úniques
-ús
+xec
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/ca/ca_stop_words_old.txt b/apps/common/src/python/mediawords/languages/ca/ca_stop_words_old.txt
new file mode 100644
index 0000000000..eaf6168385
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/ca/ca_stop_words_old.txt
@@ -0,0 +1,781 @@
+#
+# This is a stop word list for the Catalan language.
+#
+# Sources:
+#     https://raw.githubusercontent.com/stopwords-iso/stopwords-ca/master/stopwords-ca.txt
+#     http://latel.upf.edu/morgana/altres/pub/ca_stop.htm
+#     https://www.ranks.nl/stopwords/catalan
+#
+
+a
+abans
+abans-d'ahir
+abintestat
+ací
+adesiara
+adàgio
+adés
+adéu
+ah
+ahir
+ai
+aitambé
+aitampoc
+aitan
+aitant
+aitantost
+aixà
+així
+això
+al
+aleshores
+algun
+alguna
+algunes
+alguns
+algú
+alhora
+allà
+allèn
+allí
+allò
+almenys
+als
+alto
+altra
+altre
+altres
+altresí
+altri
+al·legro
+alça
+amargament
+amb
+ambdues
+ambdós
+amunt
+amén
+anar
+anc
+andante
+andantino
+anit
+ans
+antany
+apa
+aprés
+aqueix
+aqueixa
+aqueixes
+aqueixos
+aqueixs
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquèn
+aquí
+ara
+arran
+arrera
+arrere
+arreu
+arri
+arruix
+atxim
+au
+avall
+avant
+aviat
+avui
+açò
+bah
+baix
+baldament
+ballmanetes
+banzim-banzam
+bastant
+bastants
+ben
+bis
+bitllo-bitllo
+bo
+bé
+ca
+cada
+cadascuna
+cadascunes
+cadascuns
+cadascú
+cal
+cap
+car
+caram
+catorze
+cent
+centes
+cents
+cerca
+cert
+certa
+certes
+certs
+cinc
+cinquanta
+cinquena
+cinquenes
+cinquens
+cinquè
+com
+comsevulla
+consegueixo
+conseguim
+conseguir
+consigueix
+consigueixen
+consigueixes
+contra
+cordons
+corrents
+cric-crac
+d
+d'un
+d'una
+d'unes
+d'uns
+daixonses
+daixò
+dallonses
+dallò
+dalt
+daltabaix
+damunt
+darrera
+darrere
+davall
+davant
+de
+debades
+dedins
+defora
+dejorn
+dejús
+del
+dellà
+dels
+dementre
+dempeus
+demà
+demés
+des
+des de
+desena
+desenes
+desens
+després
+dessobre
+dessota
+dessús
+desè
+deu
+devers
+devora
+deçà
+diferents
+dinou
+dins
+dintre
+disset
+divers
+diversa
+diverses
+diversos
+divuit
+donat
+doncs
+dos
+dotze
+dues
+durant
+e
+ecs
+eh
+el
+ela
+elis
+ell
+ella
+elles
+ells
+els
+em
+emperò
+en
+enans
+enant
+encara
+encontinent
+endalt
+endarrera
+endarrere
+endavant
+endebades
+endemig
+endemà
+endemés
+endins
+endintre
+enfora
+engir
+enguany
+enguanyasses
+enjús
+enlaire
+enlloc
+enllà
+enrera
+enrere
+ens
+ensems
+ensota
+ensús
+entorn
+entre
+entremig
+entretant
+entrò
+envers
+envides
+environs
+enviró
+ençà
+ep
+era
+erem
+eren
+eres
+ergo
+es
+escar
+essent
+esser
+est
+esta
+estada
+estades
+estan
+estant
+estar
+estaran
+estarem
+estareu
+estaria
+estarien
+estaries
+estarà
+estaràs
+estaré
+estaríem
+estaríeu
+estat
+estats
+estava
+estaven
+estaves
+estem
+estes
+esteu
+estic
+estiguem
+estigueren
+estigueres
+estigues
+estiguessis
+estigueu
+estigui
+estiguin
+estiguis
+estigué
+estiguérem
+estiguéreu
+estigués
+estiguí
+estos
+està
+estàs
+estàvem
+estàveu
+et
+etc
+etcètera
+ets
+excepte
+fa
+faig
+fan
+fas
+fem
+fer
+fer	faig
+feu
+fi
+fins
+fora
+foren
+fores
+força
+fos
+fossin
+fossis
+fou
+fra
+fui
+fóra
+fórem
+fóreu
+fóssim
+fóssiu
+gaire
+gairebé
+gaires
+gens
+girientorn
+gratis
+ha
+hagi
+hagin
+hagis
+haguda
+hagudes
+hagueren
+hagueres
+haguessin
+haguessis
+hagut
+haguts
+hagué
+haguérem
+haguéreu
+hagués
+haguéssim
+haguéssiu
+haguí
+hala
+han
+has
+hauran
+haurem
+haureu
+hauria
+haurien
+hauries
+haurà
+hauràs
+hauré
+hauríem
+hauríeu
+havem
+havent
+haver
+haveu
+havia
+havien
+havies
+havíem
+havíeu
+he
+hem
+heu
+hi
+ho
+hom
+hui
+hàgim
+hàgiu
+i
+igual
+iguals
+inclusive
+inclòs
+ja
+jamai
+jo
+l
+l'hi
+la
+leri-leri
+les
+li
+li'n
+lla
+llarg
+llavors
+llevat
+lluny
+llur
+llurs
+lo
+los
+ls
+m
+m'he
+ma
+mai
+mal
+malament
+malgrat
+manco
+mant
+manta
+mantes
+mantinent
+mants
+massa
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+mentrestant
+menys
+mes
+meu
+meua
+meues
+meus
+meva
+meves
+mi
+mig
+mil
+mitges
+mitja
+mitjançant
+mitjos
+mode
+moixoni
+molt
+molta
+moltes
+molts
+mon
+mons
+mos
+més
+n
+n'he
+n'hi
+na
+ne
+ni
+ningú
+no
+nogensmenys
+només
+noranta
+nos
+nosaltres
+nostra
+nostre
+nostres
+nou
+novena
+novenes
+novens
+novè
+ns
+nòs
+nós
+o
+oh
+oi
+oidà
+on
+onsevulga
+onsevulla
+onze
+pas
+pel
+pels
+pengim-penjam
+per
+per que
+perquè
+pertot
+però
+piano
+pla
+poc
+poca
+pocs
+podem
+poden
+poder
+podeu
+poques
+potser
+prest
+primer
+primera
+primeres
+primers
+pro
+prompte
+prop
+propi
+prou
+puc
+puix
+pus
+pàssim
+qual
+quals
+qualsevol
+qualsevulla
+qualssevol
+qualssevulla
+quan
+quant
+quanta
+quantes
+quants
+quaranta
+quart
+quarta
+quartes
+quarts
+quasi
+quatre
+que
+quelcom
+qui
+quin
+quina
+quines
+quins
+quinze
+quisvulla
+què
+ran
+re
+rebé
+renoi
+rera
+rere
+res
+retruc
+s
+s'ha
+s'han
+sa
+sabem
+saben
+saber
+sabeu
+salvament
+salvant
+salvat
+sap
+saps
+se
+segon
+segona
+segones
+segons
+seguida
+seixanta
+semblant
+semblants
+sempre
+sengles
+sens
+sense
+ser
+seran
+serem
+sereu
+seria
+serien
+series
+serà
+seràs
+seré
+seríem
+seríeu
+ses
+set
+setanta
+setena
+setenes
+setens
+setze
+setè
+seu
+seua
+seues
+seus
+seva
+seves
+si
+sia
+siau
+sic
+siguem
+sigues
+sigueu
+sigui
+siguin
+siguis
+sinó
+sis
+sisena
+sisenes
+sisens
+sisè
+sobre
+sobretot
+soc
+sol
+sola
+solament
+soles
+sols
+som
+son
+sons
+sos
+sota
+sots
+sou
+sovint
+suara
+sí
+sóc
+són
+t
+t'ha
+t'han
+t'he
+ta
+tal
+tals
+també
+tampoc
+tan
+tanmateix
+tant
+tanta
+tantes
+tantost
+tants
+te
+tene
+tenim
+tenir
+teniu
+tercer
+tercera
+terceres
+tercers
+tes
+teu
+teua
+teues
+teus
+teva
+teves
+tinc
+ton
+tons
+tos
+tost
+tostemps
+tot
+tota
+total
+totes
+tothom
+tothora
+tots
+trenta
+tres
+tret
+tretze
+tu
+tururut
+u
+uf
+ui
+uix
+ultra
+un
+una
+unes
+uns
+up
+upa
+us
+va
+vagi
+vagin
+vagis
+vaig
+vair
+vam
+van
+vares
+vas
+vau
+vem
+verbigràcia
+vers
+vet
+veu
+vint
+vora
+vos
+vosaltres
+vostra
+vostre
+vostres
+vostè
+vostès
+vuit
+vuitanta
+vuitena
+vuitenes
+vuitens
+vuitè
+vàreig
+vàrem
+vàreu
+vés
+vós
+xano-xano
+xau-xau
+xec
+àdhuc
+àlies
+ça
+ço
+érem
+éreu
+és
+éssent
+ésser
+ídem
+òlim
+últim
+última
+últimes
+últims
+únic
+única
+únics
+úniques
+ús
diff --git a/apps/common/src/python/mediawords/languages/da/da_stop_words.txt b/apps/common/src/python/mediawords/languages/da/da_stop_words.txt
index 220a35602a..ea271bda3a 100644
--- a/apps/common/src/python/mediawords/languages/da/da_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/da/da_stop_words.txt
@@ -1,16 +1,20 @@
-#
-# This is a stop word list for the Danish language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-da/blob/master/stopwords-da.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 ad
 af
+aldrig
 alle
 alt
 anden
+andet
+andre
 at
+bare
+begge
 blev
 blive
 bliver
@@ -25,77 +29,148 @@ det
 dette
 dig
 din
+dine
 disse
+dit
 dog
 du
 efter
+ej
 eller
 en
 end
+ene
+eneste
+enhver
 er
 et
+få
+far
+får
+fem
+fik
+fire
+flere
+fleste
 for
+før
+fordi
+forrige
 fra
+god
+godt
 ham
 han
 hans
 har
 havde
 have
+hej
+helt
 hende
 hendes
 her
 hos
 hun
 hvad
+hvem
+hver
+hvilken
 hvis
 hvor
+hvordan
+hvorfor
+hvornår
 i
 ikke
 ind
+ingen
+intet
+ja
 jeg
 jer
+jeres
 jo
+kan
+kom
+komme
+kommer
+kun
 kunne
+lad
+lav
+lidt
+lige
+lille
+må
 man
+mand
 mange
 med
 meget
 men
+mens
+mere
 mig
 min
 mine
 mit
 mod
+når
+nær
+næste
+næsten
 ned
+nej
+ni
+nogen
 noget
 nogle
 nu
-når
+ny
+nyt
 og
 også
+okay
 om
 op
 os
+otte
 over
 på
+så
+sådan
+se
+seks
 selv
+ser
+ses
 sig
+sige
 sin
 sine
 sit
 skal
 skulle
 som
-sådan
+stor
+store
+syv
+tag
+tage
 thi
+ti
 til
+to
+tre
 ud
 under
 var
+være
+været
+ved
 vi
 vil
 ville
 vor
-være
-været
+vores
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/da/da_stop_words_old.txt b/apps/common/src/python/mediawords/languages/da/da_stop_words_old.txt
new file mode 100644
index 0000000000..220a35602a
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/da/da_stop_words_old.txt
@@ -0,0 +1,101 @@
+#
+# This is a stop word list for the Danish language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+ad
+af
+alle
+alt
+anden
+at
+blev
+blive
+bliver
+da
+de
+dem
+den
+denne
+der
+deres
+det
+dette
+dig
+din
+disse
+dog
+du
+efter
+eller
+en
+end
+er
+et
+for
+fra
+ham
+han
+hans
+har
+havde
+have
+hende
+hendes
+her
+hos
+hun
+hvad
+hvis
+hvor
+i
+ikke
+ind
+jeg
+jer
+jo
+kunne
+man
+mange
+med
+meget
+men
+mig
+min
+mine
+mit
+mod
+ned
+noget
+nogle
+nu
+når
+og
+også
+om
+op
+os
+over
+på
+selv
+sig
+sin
+sine
+sit
+skal
+skulle
+som
+sådan
+thi
+til
+ud
+under
+var
+vi
+vil
+ville
+vor
+være
+været
diff --git a/apps/common/src/python/mediawords/languages/de/de_stop_words.txt b/apps/common/src/python/mediawords/languages/de/de_stop_words.txt
index aad240c48c..57a23fd1e9 100644
--- a/apps/common/src/python/mediawords/languages/de/de_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/de/de_stop_words.txt
@@ -1,16 +1,27 @@
-#
-# This is a stop word list for the German language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-de/blob/master/stopwords-de.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
+a
+ab
 aber
+ach
+acht
+achte
+achten
+achter
+achtes
+ag
 alle
+allein
 allem
 allen
 aller
+allerdings
 alles
+allgemeinen
 als
 also
 am
@@ -25,19 +36,65 @@ anderm
 andern
 anderr
 anders
+au
 auch
 auf
 aus
+ausser
+ausserdem
+außer
+außerdem
+b
+bald
 bei
+beide
+beiden
+beim
+beispiel
+bekannt
+bereits
+besonders
+besser
+besten
 bin
 bis
+bisher
 bist
+c
+d
+d.h
 da
+dabei
+dadurch
+dafür
+dagegen
+daher
+dahin
+dahinter
+damals
 damit
+danach
+daneben
+dank
 dann
+daran
+darauf
+daraus
+darf
+darfst
+darin
+darüber
+darum
+darunter
 das
+dasein
+daselbst
+dass
 dasselbe
+davon
+davor
 dazu
+dazwischen
 daß
 dein
 deine
@@ -46,19 +103,34 @@ deinen
 deiner
 deines
 dem
+dementsprechend
+demgegenüber
+demgemäss
+demgemäß
 demselben
+demzufolge
 den
+denen
 denn
 denselben
 der
+deren
 derer
+derjenige
+derjenigen
+dermassen
+dermaßen
 derselbe
 derselben
 des
+deshalb
 desselben
 dessen
+deswegen
 dich
 die
+diejenige
+diejenigen
 dies
 diese
 dieselbe
@@ -70,9 +142,32 @@ dieses
 dir
 doch
 dort
+drei
+drin
+dritte
+dritten
+dritter
+drittes
 du
 durch
+durchaus
+dürfen
+dürft
+durfte
+durften
+e
+eben
+ebenso
+ehrlich
+ei
+ei,
+eigen
+eigene
+eigenen
+eigener
+eigenes
 ein
+einander
 eine
 einem
 einen
@@ -85,8 +180,21 @@ einigen
 einiger
 einiges
 einmal
+eins
+elf
+en
+ende
+endlich
+entweder
 er
+ernst
+erst
+erste
+ersten
+erster
+erstes
 es
+etwa
 etwas
 euch
 euer
@@ -95,18 +203,75 @@ eurem
 euren
 eurer
 eures
+f
+folgende
+früher
+fünf
+fünfte
+fünften
+fünfter
+fünftes
 für
+g
+gab
+ganz
+ganze
+ganzen
+ganzer
+ganzes
+gar
+gedurft
 gegen
+gegenüber
+gehabt
+gehen
+geht
+gekannt
+gekonnt
+gemacht
+gemocht
+gemusst
+genug
+gerade
+gern
+gesagt
+geschweige
 gewesen
+gewollt
+geworden
+gibt
+ging
+gross
+grosse
+grosser
+grosses
+große
+großer
+großes
+gut
+guter
+gutes
+h
 hab
 habe
 haben
+habt
+hast
 hat
 hatte
+hätte
 hatten
+hätten
+hattest
+hattet
+heisst
+her
+heute
 hier
 hin
 hinter
+hoch
+i
 ich
 ihm
 ihn
@@ -118,78 +283,219 @@ ihren
 ihrer
 ihres
 im
+immer
 in
 indem
+infolgedessen
 ins
+irgend
 ist
+j
+ja
+jahr
+jahre
+jahren
+je
 jede
 jedem
 jeden
 jeder
+jedermann
+jedermanns
 jedes
+jedoch
+jemand
+jemandem
+jemanden
 jene
 jenem
 jenen
 jener
 jenes
 jetzt
+k
+kam
 kann
+kannst
+kaum
 kein
 keine
 keinem
 keinen
 keiner
 keines
+kleine
+kleinen
+kleiner
+kleines
+kommen
+kommt
 können
+könnt
+konnte
 könnte
+konnten
+kurz
+l
+lang
+lange
+leicht
+leide
+lieber
+los
+m
 machen
+macht
+machte
+mag
+magst
+mahn
+mal
 man
 manche
 manchem
 manchen
 mancher
 manches
+mann
+mehr
 mein
 meine
 meinem
 meinen
 meiner
 meines
+mensch
+menschen
 mich
 mir
 mit
+mittel
+mochte
+möchte
+mochten
+mögen
+möglich
+mögt
+morgen
 muss
+müssen
+musst
+müsst
 musste
+mussten
+muß
+mußt
+müßt
+n
+na
 nach
+nachdem
+nahm
+natürlich
+neben
+nein
+neue
+neuen
+neun
+neunte
+neunten
+neunter
+neuntes
 nicht
 nichts
+nie
+niemand
+niemandem
+niemanden
 noch
 nun
 nur
+o
 ob
+oben
 oder
+offen
+oft
 ohne
+p
+q
+r
+recht
+rechte
+rechten
+rechter
+rechtes
+s
+sa
+sache
+sagt
+sagte
+sah
+schlecht
+schluss
+sechs
+sechste
+sechsten
+sechster
+sechstes
 sehr
+sei
+seid
+seien
 sein
 seine
 seinem
 seinen
 seiner
 seines
+seit
+seitdem
 selbst
 sich
 sie
+sieben
+siebente
+siebenten
+siebenter
+siebentes
 sind
 so
+solang
 solche
 solchem
 solchen
 solcher
 solches
 soll
+sollen
+sollst
+sollt
 sollte
+sollten
 sondern
 sonst
+soweit
+sowie
+später
+startseite
+statt
+steht
+suche
+t
+tag
+tage
+tagen
+tat
+teil
+tel
+tritt
+trotzdem
+tun
+u
+über
+überhaupt
+übrigens
 um
 und
 uns
@@ -197,42 +503,111 @@ unse
 unsem
 unsen
 unser
+unsere
+unserer
 unses
 unter
+v
+vergangenen
 viel
+viele
+vielem
+vielen
+vielleicht
+vier
+vierte
+vierten
+vierter
+viertes
 vom
 von
 vor
+w
+wahr
+während
+währenddem
+währenddessen
+wann
 war
+wäre
 waren
 warst
+wart
+warum
 was
 weg
+wegen
 weil
+weit
 weiter
+weitere
+weiteren
+weiteres
 welche
 welchem
 welchen
 welcher
 welches
+wem
+wen
+wenig
+wenige
+weniger
+weniges
+wenigstens
 wenn
+wer
 werde
 werden
+werdet
+weshalb
+wessen
 wie
 wieder
+wieso
 will
+willst
 wir
 wird
+wirklich
 wirst
 wo
+woher
+wohin
+wohl
 wollen
+wollt
 wollte
-während
+wollten
+worden
+wurde
 würde
+wurden
 würden
+x
+y
+z
+z.b
+zehn
+zehnte
+zehnten
+zehnter
+zehntes
+zeit
 zu
+zuerst
+zugleich
 zum
+zunächst
 zur
+zurück
+zusammen
+zwanzig
 zwar
+zwei
+zweite
+zweiten
+zweiter
+zweites
 zwischen
-über
+zwölf
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/de/de_stop_words_old.txt b/apps/common/src/python/mediawords/languages/de/de_stop_words_old.txt
new file mode 100644
index 0000000000..aad240c48c
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/de/de_stop_words_old.txt
@@ -0,0 +1,238 @@
+#
+# This is a stop word list for the German language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+aber
+alle
+allem
+allen
+aller
+alles
+als
+also
+am
+an
+ander
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+auch
+auf
+aus
+bei
+bin
+bis
+bist
+da
+damit
+dann
+das
+dasselbe
+dazu
+daß
+dein
+deine
+deinem
+deinen
+deiner
+deines
+dem
+demselben
+den
+denn
+denselben
+der
+derer
+derselbe
+derselben
+des
+desselben
+dessen
+dich
+die
+dies
+diese
+dieselbe
+dieselben
+diesem
+diesen
+dieser
+dieses
+dir
+doch
+dort
+du
+durch
+ein
+eine
+einem
+einen
+einer
+eines
+einig
+einige
+einigem
+einigen
+einiger
+einiges
+einmal
+er
+es
+etwas
+euch
+euer
+eure
+eurem
+euren
+eurer
+eures
+für
+gegen
+gewesen
+hab
+habe
+haben
+hat
+hatte
+hatten
+hier
+hin
+hinter
+ich
+ihm
+ihn
+ihnen
+ihr
+ihre
+ihrem
+ihren
+ihrer
+ihres
+im
+in
+indem
+ins
+ist
+jede
+jedem
+jeden
+jeder
+jedes
+jene
+jenem
+jenen
+jener
+jenes
+jetzt
+kann
+kein
+keine
+keinem
+keinen
+keiner
+keines
+können
+könnte
+machen
+man
+manche
+manchem
+manchen
+mancher
+manches
+mein
+meine
+meinem
+meinen
+meiner
+meines
+mich
+mir
+mit
+muss
+musste
+nach
+nicht
+nichts
+noch
+nun
+nur
+ob
+oder
+ohne
+sehr
+sein
+seine
+seinem
+seinen
+seiner
+seines
+selbst
+sich
+sie
+sind
+so
+solche
+solchem
+solchen
+solcher
+solches
+soll
+sollte
+sondern
+sonst
+um
+und
+uns
+unse
+unsem
+unsen
+unser
+unses
+unter
+viel
+vom
+von
+vor
+war
+waren
+warst
+was
+weg
+weil
+weiter
+welche
+welchem
+welchen
+welcher
+welches
+wenn
+werde
+werden
+wie
+wieder
+will
+wir
+wird
+wirst
+wo
+wollen
+wollte
+während
+würde
+würden
+zu
+zum
+zur
+zwar
+zwischen
+über
diff --git a/apps/common/src/python/mediawords/languages/en/en_stop_words.txt b/apps/common/src/python/mediawords/languages/en/en_stop_words.txt
index eec3311701..b69d36a7f3 100644
--- a/apps/common/src/python/mediawords/languages/en/en_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/en/en_stop_words.txt
@@ -1,1399 +1,321 @@
-#
 # This is a "long" stop word list for the English language.
 #
 # Sources:
 #     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     http://www.lextek.com/manuals/onix/stopwords1.html
+#     http://xpo6.com/list-of-english-stop-words/
+#     https://countwordsfree.com/stopwords
+#     https://gist.github.com/sebleier/554280 (NLTK stop words)
 #     https://github.com/arc12/Text-Mining-Weak-Signals/wiki/Standard-set-of-english-stopwords
+#     https://github.com/berkmancenter/mediacloud-sentence-splitter/blob/develop/sentence_splitter/non_breaking_prefixes/en.txt
+#     https://github.com/stopwords-iso/stopwords-en/blob/master/stopwords-en.txt
 #     https://www.link-assistant.com/seo-stop-words.html
-#     some English non-breaking prefixes
-#
+#     https://www.ranks.nl/stopwords
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
+'ll
+'tis
+'twas
+'ve
+10
+39
+A
+Adj
+Adm
+Adv
+Apr
+Art
+Asst
+Aug
+B
+Bart
+Bldg
+Brig
+C
+Capt
+Cmdr
+Co
+Col
+Comdr
+Con
+Corp
+Cpl
+D
+DR
+Dec
+Dr
+Drs
+E
+Ens
+F
+Feb
+Fig
+G
+Gen
+Gov
+H
+Hon
+Hosp
+Hr
+I
+I'd
+I'll
+I'm
+I've
+Inc
+Insp
+J
+Jan
+Jr
+Jul
+Jun
+K
+L
+Lt
+M
+MM
+MR
+MRS
+MS
+Maj
+Mar
+Messrs
+Mlle
+Mme
+Mr
+Mrs
+Ms
+Msgr
+N
+No
+Nos
+Nov
+Nr
+O
+Oct
+Okt
+Op
+Ord
+P
+Pfc
+Ph
+Ph.D
+PhD
+Prof
+Pvt
+Q
+R
+Rep
+Reps
+Res
+Rev
+Rt
+S
+Sen
+Sens
+Sep
+Sept
+Sfc
+Sgt
+Sr
+St
+Supt
+Surg
+T
+U
+V
+W
+X
+Y
+Z
 a
 a's
-abandoned
-abbr
-ability
 able
-aboard
+ableabout
 about
 above
-abroad
-absence
-absent
-absolute
-absolutely
-absorbed
-abstract
-academic
-accept
-acceptable
-acceptance
-accepted
-accepting
-access
-accident
-accompanied
-accomplish
-accomplished
+abst
 accordance
 according
 accordingly
-account
-accounts
-accuracy
-accurate
-accurately
-accused
-achieve
-achieved
-achievement
-achievements
-acquire
-acquired
 across
 act
-acting
-action
-actions
-active
-activities
-activity
-actor
-acts
-actual
 actually
-ad
-add
 added
-adding
-addition
-additional
-address
-addressed
-addresses
-adequate
 adj
-adjusted
-adjustment
-adjustments
-adm
-administration
-admission
-admit
-admitted
-adopted
-adult
-adults
-adv
-advance
-advanced
-advantage
-advantages
-advertising
-advice
-advised
-aesthetic
-affair
-affairs
-affect
 affected
+affecting
 affects
-afford
-afraid
 after
-afternoon
 afterwards
 again
 against
-age
-agencies
-agency
-agent
-agents
-ages
 ago
-agree
-agreed
+ah
 ahead
-aid
-aids
-aim
-aimed
 ain't
-air
-al
-alert
-alienation
-align
-alike
-alive
+aint
 all
-alliance
-allied
-allies
-allotment
-allow
-allowances
-allowed
-allowing
-allows
 almost
 alone
 along
 alongside
 already
 also
-altered
-alternative
 although
-altogether
 always
 am
-amazing
-ambiguous
-amendment
 amid
 amidst
 among
 amongst
+amoungst
 amount
-amounts
 an
-analysis
-ancient
 and
-anger
-angle
-angry
-animal
-animals
-anniversary
-announced
-announcement
-annual
-anode
+announce
 another
-answer
-answered
-answers
-anti-Semitism
-anti-trust
-anticipated
-anticipation
-anxiety
-anxious
 any
 anybody
 anyhow
+anymore
 anyone
 anything
 anyway
 anyways
 anywhere
 apart
-apartment
-apparatus
-apparent
 apparently
-appeal
 appear
-appearance
-appeared
-appears
-apple
-application
-applications
-applied
-apply
-applying
-appointed
-appointment
-appreciate
-appreciation
-approach
-approached
-approaches
-approaching
-appropriate
-approval
-approved
 approximately
-apr
-april
-arbitrary
-arc
-architect
 are
 area
 areas
 aren
 aren't
-argue
-argued
-argument
+arent
 arise
-arm
 around
-aroused
-arrange
-arranged
-arrangement
-arrangements
-arrest
-arrested
-arrival
-arrive
-arrived
-art
-artery
-article
-articles
-artist
-artistic
-artists
-arts
 as
 aside
 ask
 asked
 asking
 asks
-asleep
-aspect
-aspects
-assembled
-assessment
-assessors
-assigned
-assignment
-assist
-assistance
-assistant
-associate
 associated
-association
-asst
-assume
-assumed
-assumption
-assumptions
-assure
-assured
-astronomy
 at
-atmosphere
-atom
-atomic
-atoms
-attached
-attack
-attacked
-attacks
-attain
-attempt
-attempted
-attempting
-attempts
-attend
-attended
-attending
-attention
-attitude
-attitudes
-attorney
-attract
-attracted
-attractive
-audience
-aug
-august
-aunt
-authentic
-author
-authorities
-authority
-authorized
-authors
-auto
-automatic
-automatically
-automobile
-automobiles
-autumn
-availability
+auth
 available
-average
-avoid
-avoided
-awake
-award
-aware
-awareness
 away
 awfully
-axis
 b
-baby
-back
-backed
-background
-backing
-backs
-backward
-backwards
-bad
-badly
-bag
-balance
-balanced
-ball
-band
-bank
-banks
-bar
-bare
-barely
-barn
-barrel
-bars
-bart
-base
-baseball
-based
-basement
-bases
-basic
-basically
-basis
-bat
-bath
-battle
-bay
 be
-beach
-bear
-beard
-bearing
-beat
-beautiful
-beauty
 became
 because
 become
 becomes
 becoming
-bed
-bedroom
-beef
 been
-beer
 before
 beforehand
-began
-begin
-beginning
-begins
-begun
-behalf
-behavior
-behind
 being
 beings
-belief
-beliefs
 believe
-believed
-believes
-bell
-belly
-belong
-belongs
 below
-bench
-beneath
-benefit
-benefits
-bent
 beside
 besides
 best
-bet
 better
 between
 beyond
-bgcolor
-bid
-big
-bigger
-biggest
-bill
-billion
-bills
-binding
-binomial
-biological
-bird
-birds
-birth
-bit
-bitter
-black
-blame
-blanket
-bldg
-blind
-block
-blockquote
-blocks
-blog
-blonde
-blood
-blow
-blue
-blues
-board
-boards
-boat
-boating
-boats
-bod
-bodies
-body
-bold
-bomb
-bombs
-bond
-bonds
-bone
-bones
-book
-books
-border
-bore
-born
-boss
 both
-bother
-bottle
-bottom
-bought
-bound
-box
-boy
-boys
-br
-branch
-branches
-brave
-bread
-break
-breakfast
-breaking
-breath
-breathing
-brick
-bride
-bridge
-bridges
-brief
-briefly
-brig
-bright
-brilliant
-bring
-bringing
-brings
-broad
-broke
-broken
-bronchial
-bros
-brother
-brought
-brown
-browser
-brush
-brushed
-budget
-build
-builder
-building
-buildings
-built
-bullet
-bullets
-bundle
-burden
-bureau
-burn
-burned
-burning
-burns
-burst
-bus
-business
-businesses
-busy
 but
-butter
 buy
-buying
 by
 c
 c'mon
 c's
-cafe
-calculated
-calendar
 call
-called
-calls
-calm
 came
-camera
-camp
 can
 can't
-candidate
-candidates
 cannot
 cant
-cap
-capabilities
-capable
-capacity
-capital
-capt
-captain
 caption
-car
-carbon
-card
-care
-career
-careful
-carefully
-carried
-carries
-carry
-carrying
-cars
 case
 cases
-cash
-cast
-casual
-cat
-catch
-categories
-category
-cattle
-caught
 cause
-caused
 causes
-cdt
-ceiling
-cell
-cellar
-cells
-cent
-center
-centers
-central
-cents
-centuries
-century
 certain
 certainly
-certainty
-cf
-chain
-chair
-chairman
-chairs
-challenge
-champion
-chance
-chances
-change
-changed
-changes
-changing
-channels
-chapel
-char
-character
-characteristic
-characteristics
-characterized
-characters
-charge
-charged
-charges
-charm
-charming
-charoff
-chart
-charter
-cheap
-check
-checked
-cheek
-chemical
-chest
-chick
-chicken
-chief
-chiefly
-chlorine
-choice
-cholesterol
-choose
-chord
-chose
-chosen
-church
-churches
-cigarette
-circle
-circles
-circular
-circumstances
-cite
-cited
-cities
-citizen
-citizens
-city
-civic
-civil
-civilian
-civilization
-claim
-claimed
-claims
-clarity
-class
-classes
-classic
-classical
-classification
-clean
-cleaning
-clear
-cleared
-clearly
-clerk
-click
-climb
-climbed
-clinical
-clock
-close
-closed
-closely
-closer
-closing
-cloth
-clothe
-clothes
-clothing
-cloud
-clouds
-club
-cmdr
+cmon
 co
 co.
-coach
-coast
-coat
-coating
-cocktail
-code
-coffee
-col
-cold
-colleagues
-collect
-collected
-collection
-collective
-colonel
-colonial
-colony
-color
-colored
-colorful
-colors
-column
-columns
 com
-combat
-combination
-combined
-comdr
 come
-comedy
 comes
-comfort
-comfortable
-coming
-command
-commander
-comment
-comments
-commerce
-commercial
-commissioner
-committed
-committee
-commodities
-common
-commonly
-communication
-communications
-communism
-communities
-community
-companies
-companion
-company
-comparable
-compare
-compared
-comparison
-compete
-competition
-competitive
-complained
-complement
-complete
-completed
-completely
-completion
-complex
-complicated
-component
-components
-composed
-composer
-composition
-compromise
-computed
-con
-conceived
-concentrated
-concentration
-concept
-conception
-concepts
-concern
-concerned
 concerning
-concerns
-concert
-concerts
-concluded
-conclusion
-conclusions
-concrete
-condemned
-condition
-conditioned
-conditions
-conduct
-conducted
-conductor
-conference
-conferences
-confidence
-confirmed
-conflict
-confronted
-confused
-confusion
-congressional
-connect
-connected
-connection
-conscience
-conscious
-consciousness
-consequence
-consequences
 consequently
 consider
-considerable
-considerably
-consideration
-considerations
-considered
 considering
-consisted
-consistent
-consistently
-consisting
-consists
-consonant
-conspiracy
-constant
-constantly
-constitute
-constitutional
-constructed
-construction
-consumer
-contact
-contacts
 contain
-contained
 containing
 contains
-contemporary
-content
-contest
-context
-continent
-continually
-continue
-continued
-continues
-continuing
-continuity
-continuous
-continuously
-contract
-contracts
-contrary
-contrast
-contribute
-contributed
-contributions
-control
-controlled
-controlling
-controls
-controversy
-convenience
-convenient
-conventional
-conversation
-conversion
-converted
-conviction
-convictions
-convinced
-cook
-cooking
-cool
-cooling
-cooperative
-cope
-copy
-core
-corn
-corner
-corp
-correct
-correspondence
 corresponding
-cost
-costs
-cottage
-cotton
 could
+could've
+couldn
 couldn't
-council
-count
-counter
-counties
-countries
-country
-county
-couple
-courage
-course
-courses
-court
-courts
-cousin
-cover
-coverage
-covered
-covering
-covers
-cow
-cpl
-crack
-craft
-crash
-crawled
-crazy
-cream
-crease
-create
-created
-creating
-creation
-creative
-creatures
-credit
-crew
-critic
-critical
-criticism
-critics
-crop
-cross
-crossed
-crossing
-crowd
-crowded
-crown
-crucial
+couldnt
 cry
-cst
-cultural
-culture
-cure
-curiosity
-curious
-current
 currently
-curt
-curve
-customer
-customers
-cut
-cuts
-cutting
-cycle
 d
-D
-dad
-daily
-damage
-damn
-dance
-dancer
-dancers
-dances
-dancing
-danger
-dangerous
 dare
 daren't
-dark
-darkness
-data
+darent
 date
-dates
-datetime
-daughter
-dawn
-day
-days
-dead
-deal
-dealer
-dealers
-dealing
-dealt
 dear
-death
-debate
-dec
-decade
-decades
-december
-decent
-decide
-decided
-decimal
-decision
-decisions
-deck
-declaration
-declared
-decline
-dedicated
-dedication
-deep
-deeper
-deeply
-defeat
-defend
-defense
-define
-defined
-definite
 definitely
-definition
-degree
-degrees
-del
-delay
-delayed
-deliberately
-delicate
-delight
-delightful
-delivered
-delivery
-demand
-demanded
-demanding
-demands
-democracy
-demonstrate
-demonstrated
-demonstration
-denied
-density
-deny
-department
-departments
-depend
-dependent
-depending
-depends
-depression
-depth
-derived
-describe
-described
-describes
-description
-desegregation
-desert
-design
-designed
-designs
-desirable
-desire
-desired
-desires
-desk
-despair
-desperate
-desperately
 despite
-destiny
-destroy
-destroyed
-destruction
-destructive
-detail
-detailed
-details
-detergent
-determination
-determine
-determined
-determining
-develop
-developed
-developing
-development
-developments
-device
-devices
-devil
-devoted
-diameter
-dictionary
 did
 didn
 didn't
-die
-died
-diet
+didnt
 differ
-difference
-differences
 different
 differently
-difficult
-difficulties
-difficulty
-diffusion
-dignity
-dilemma
-dimensions
-dining
-dinner
-diplomatic
-dir
-direct
-directed
-direction
-directions
 directly
-director
-directors
-dirt
-dirty
-disappeared
-disaster
-discharge
-discipline
-discuss
-discussed
-discussion
-discussions
-disease
-dishes
-disk
-displacement
-display
-displayed
-displays
-disposal
-dispute
-distance
-distant
-distinct
-distinction
-distinctive
-distinguished
-distributed
-distribution
-districts
-disturbed
-div
-dive
-divide
-divided
-divine
-division
-divorce
 do
-doctor
-doctors
-doctrine
-documents
 does
 doesn
 doesn't
-dog
-dogs
+doesnt
 doing
-dollar
-dollars
-domestic
-dominant
-dominated
 don
 don't
 done
-door
-doors
-double
-doubt
-doubtful
-down
-downed
-downing
-downs
-downtown
-downwards
-dozen
-dr
-draft
-drama
-dramatic
-drank
-draw
-drawing
-drawings
-drawn
-dream
-dreamed
-dreams
-dress
-dressed
-dressing
-drew
-dried
-drill
-drink
-drinking
-drinks
-drive
-driven
-driver
-drivers
-driving
-drop
-dropped
-drove
-drs
-drug
-drugs
-drunk
-dry
-drying
-duck
+dont
 due
-dull
 during
-dust
-duties
-duty
-dying
-dynamic
 e
+e.g
 each
-eager
-ear
-earlier
-earliest
-early
-earnings
-ears
-earth
-ease
-easier
-easily
-east
-easy
-eat
-eating
-ecumenical
-edge
-edges
-edition
-editor
-editorial
-edt
-edu
 effect
-effective
-effectively
-effectiveness
-effects
-efficiency
-efficient
-effort
-efforts
 eg
-egg
-eggs
 eight
-eighteenth
-eighth
 eighty
 either
-elaborate
-elected
-electric
-electrical
-electricity
-electron
-electronic
-electronics
-element
-elements
 eleven
-eliminate
-eliminated
 else
 elsewhere
-em
-email
-emerged
-emergency
-emission
-emphasis
-emphasize
-empirical
-employed
-employee
-employees
-employment
-empty
-enable
-encounter
-encountered
-encourage
-encouraged
-encouraging
-end
-ended
-ending
-endless
-ends
-enemies
-enemy
-energy
-enforced
-enforcement
-engaged
-engagement
-engine
-engineer
-engineering
-engineers
-enjoy
-enjoyed
-enjoyment
-enormous
 enough
-ens
-enter
-entered
-entering
-enterprise
-entertainment
-enthusiasm
-enthusiastic
-entire
 entirely
-entitled
-entrance
-entries
-entry
-envelope
-environment
-equal
-equally
-equate
-equation
-equipment
-equipped
-equivalent
-era
-error
-errors
-escape
-esp
 especially
-essential
-essentially
-est
-establish
-established
-establishing
-establishment
-estate
-estimate
-estimated
-estimates
 et
+et-al
 etc
-etc.
-eternal
-ethical
-ethics
-evaluation
-eve
 even
-evening
 evenly
-event
-events
-eventually
 ever
 evermore
 every
@@ -1401,2775 +323,449 @@ everybody
 everyone
 everything
 everywhere
-evidence
-evident
-evidently
-evil
-ex
-exact
 exactly
-examination
-examine
-examined
 example
-examples
-excellent
 except
-exception
-exceptions
-excess
-excessive
-exchange
-excite
-excited
-excitement
-exciting
-exclusive
-exclusively
-excuse
-executive
-exercise
-exercises
-exhibit
-exhibition
-exist
-existed
-existence
-existing
-exists
-expanded
-expanding
-expansion
-expect
-expectations
-expected
-expects
-expenditures
-expense
-expenses
-expensive
-experience
-experienced
-experiences
-experiment
-experimental
-experiments
-expert
-experts
-explain
-explained
-explains
-explanation
-explicit
-exploration
-exposed
-exposure
-express
-expressed
-expressing
-expression
-extend
-extended
-extending
-extension
-extensive
-extent
-extra
-extraordinary
-extreme
-extremely
-eye
-eyes
 f
-fabrics
-face
-faced
-faces
-facilities
-facing
-fact
-factor
-factories
-factors
-factory
-facts
-faculty
-fail
-failed
-failure
-faint
-fair
-fairly
-faith
-fall
-fallen
-falling
-fallout
-familiar
-family
-famous
-fans
-fantastic
-far
-farm
-farmer
-farmers
-farther
-fascinating
-fashion
-fast
-fat
-fate
-father
-fathers
-fault
-favor
-favorable
-favorite
-fear
-fears
-feature
-features
-feb
-february
-fed
-federal
-feed
-feel
-feeling
-feelings
-feels
-fees
-feet
-fell
-fellow
-felt
-female
-fence
-festival
-few
-fewer
-fiber
-fibers
-fiction
-field
-fields
 fifteen
 fifth
 fifty
-fig
-fight
-fighting
-figure
-figured
-figures
-file
-filed
-filing
-fill
-filled
-filling
-film
-films
-final
-finally
-finance
-financial
-financing
 find
-finding
-findings
-finds
-fine
-finger
-fingers
-finish
-finished
-fire
-fired
-firing
-firm
-firmly
-firms
 first
-fiscal
-fish
-fishing
-fist
-fit
-fitted
 five
-fixed
-flagicon
-flash
-flat
-fled
-flesh
-flew
-flexible
-flight
-floor
-flow
-flower
-flowers
-fluid
-flux
-fly
-flying
-foam
-foams
-focus
-fog
-folk
-folks
-follow
-followed
-following
-follows
-font
-food
-foods
-fool
-foot
-football
 for
-force
-forced
-forces
-foreign
-forest
-forests
 forever
-forget
-forgive
-form
-formal
-formation
-formed
-former
-formerly
-forming
-forms
-formula
-formulas
-fort
 forth
-fortune
-forum
+forty
 forward
-fought
-found
-founded
 four
-fourteen
-fourth
-fraction
-fractions
-frame
-frames
-free
-freight
-frequencies
-frequency
-frequent
-frequently
-fresh
-friday
-friend
-friendly
-friends
-friendship
-frightened
 from
-front
-frozen
-fruit
-ft.
 full
-full-time
 fully
-fun
-function
-functional
-functions
-fund
-funds
-furnish
-furnished
-furniture
 further
 furthered
 furthering
 furthermore
 furthers
-future
 g
-gain
-gained
-gains
-game
-games
-gang
-garage
-garden
-gardens
-gas
-gather
-gathered
-gathering
 gave
-gay
-gear
-gen
 general
 generally
-generation
-generations
-generous
-genius
-gentle
-gentleman
-gentlemen
-gently
-genuine
-gesture
 get
 gets
 getting
-giant
-gift
-gin
-girl
-girls
 give
 given
 gives
 giving
-glad
-glance
-glanced
-glass
-glasses
-glory
+gmt
 go
-goal
-goals
 goes
 going
-gold
-golden
-golf
 gone
-good
-goods
-gorton
 got
 gotten
-gov
-govern
-governing
-government
-governmental
-governments
-governor
-grabbed
-grade
-grades
-gradually
-graduate
-grain
-grains
-grand
-grant
-granted
-grants
-grass
-grateful
-grave
-gray
-great
-greater
-greatest
-greatly
-green
-greeted
-greetings
-grew
-grinned
-grip
-gross
-ground
-grounds
-group
-grouped
-grouping
-groups
-grow
-growing
-grown
-grows
-growth
-guards
-guess
-guest
-guests
-guidance
-guide
-guided
-guilt
-guilty
-gun
-guns
-guy
-guys
-gyro
 h
-habit
-habits
 had
 hadn't
-hair
+hadnt
 half
-halign
-hall
-ham
-hand
-handed
-handle
-handled
-handling
-hands
-hang
-hanging
-happen
-happened
-happening
 happens
-happily
-happiness
-happy
-hard
-harder
 hardly
-harm
-harmony
 has
 hasn
 hasn't
-hat
-hate
-hated
-hatred
+hasnt
 have
+haven
 haven't
+havent
 having
 he
 he'd
 he'll
 he's
-head
-headed
-heading
-headquarters
-heads
-health
-healthy
-hear
-heard
-hearing
-heart
-hearts
-heat
-heaven
-heavily
-heavy
-heels
-height
-heights
-held
-hell
+hed
 hello
-help
-helped
-helpful
-helping
-helpless
-helps
-hen
 hence
 her
-herd
 here
 here's
 hereafter
 hereby
 herein
+heres
 hereupon
-heritage
-hero
-heroic
 hers
+herse
+herse"
 herself
-hesitated
+herse”
+hes
 hi
-hidden
-hide
-high
-higher
-highest
-highly
-hill
 him
+himse
+himse"
 himself
-hired
+himse”
 his
-historian
-historians
-historic
-historical
-history
-hit
 hither
-hits
-hold
-holder
-holding
-holds
-hole
-holes
-hollywood
 home
-homes
-hon
-honest
-honey
-honor
-honored
-hope
-hoped
+homepage
 hopefully
-hopes
-hoping
-horizon
-horse
-horses
-hosp
-host
-hot
-hotels
-hour
-hours
-house
-household
-houses
-housing
 how
+how'd
+how'll
 how's
 howbeit
 however
-hr
-href
+htm
 html
 http
-huge
-human
-humanity
 hundred
-hundreds
-hung
-hungry
-hunt
-hunting
-hurried
-hurry
-husband
-hydrogen
-hypothalamic
 i
-I
 i'd
 i'll
 i'm
 i've
-ice
-idea
-ideal
-ideas
-identical
-identification
-identified
-identify
-identity
-ideological
+i.e
+i.e.
 ie
 if
-ignored
-illness
-illusion
-illustration
-image
-images
-imagination
-imagine
-imagined
-imitation
-immediate
-immediately
-impact
-implications
-importance
-important
-imposed
-impossible
-impressed
-impression
-impressions
-impressive
-improve
-improved
-improvement
-improvements
-impulse
+ill
 in
-inadequate
 inasmuch
 inc
 inc.
-inch
-inches
-incident
-inclined
-include
-included
-includes
-including
-income
-increase
-increased
-increases
-increasing
-increasingly
-incredible
 indeed
-index
 indicate
 indicated
 indicates
-indication
-indirect
-individual
-individuals
-industrial
-industry
-inevitable
-inevitably
-influence
-information
-informed
-inherent
-initial
-initiative
-injured
-injury
-inner
-innocence
-innocent
-input
-insect
-insects
-inside
-insight
-insist
-insisted
 insofar
-insp
-inspection
-inspired
-installed
-instance
-instances
-instant
 instead
-institution
-institutions
-instruction
-instructions
-instrument
-instruments
-insurance
-insure
-integration
-intended
-intense
-intensity
-intention
-intentions
-interest
-interested
-interesting
-interests
-interference
-interior
-internal
-international
-interpretation
-interpreted
-intervals
-intervention
-interview
-interviews
-intimate
 into
-introduced
-introduction
-invariably
-invent
-invention
-inventory
-investigation
-investigations
-investment
-invited
-involve
-involved
-involves
-involving
-inward
-iron
 is
-island
 isn
 isn't
-isolated
-issue
-issued
-issues
+isnt
 it
 it'd
 it'll
 it's
-item
-items
+itd
+itll
 its
+itse"
 itself
+itse”
+ive
 j
-jacket
-jail
-jan
-january
-jazz
-jet
-job
-jobs
 join
-joined
-joint
-joke
-journey
-joy
-jr
-judges
-judgment
-judgments
-jul
-july
-jump
-jumped
-jun
-june
-jungle
-junior
-juniors
 just
-justice
-justified
-justify
 k
 keep
-keeping
 keeps
 kept
-key
-keys
-kid
-kids
-kill
-killed
-killer
-killing
 kind
-kinds
-king
-kingdom
-kitchen
-knee
-knees
 knew
-knife
-knocked
 know
-knowing
-knowledge
 known
 knows
 l
-la
-label
-laboratory
-labour
-lack
-lacked
-lacking
-ladder
-ladies
-lady
-laid
-lake
-land
-landing
-lands
-landscape
-lang
-language
-languages
-large
 largely
-larger
-largest
 last
-late
 lately
 later
-latest
 latter
 latterly
-laugh
-laughed
-laughing
-laughter
-launched
-law
-laws
-lawyer
-lawyers
-lay
-lb.
-lead
-leaders
-leadership
-leading
-leads
-lean
-leaned
-leaped
-learn
-learned
-learning
-least
-leather
-leave
-leaves
-leaving
-led
-left
-leg
-legal
-legend
-legislation
-legislative
-legislators
-legs
-length
-lengths
 less
-lesson
 lest
 let
 let's
 lets
-letter
-letters
-letting
-level
-levels
-liberal
-liberty
-libraries
-library
-license
-lid
-lie
-lies
-lieutenant
-life
-lift
-lifted
-light
-lighted
-lighting
-lightly
-lights
 like
 liked
 likely
-likes
 likewise
-limit
-limitations
-limited
-limits
 line
-linear
-lines
-link
-link-en
-lips
-liquid
-liquor
-list
-listed
-listen
-listened
-listeners
-listening
-lists
-literally
-literary
-literature
-little
-live
-lived
-lively
-lives
-livestock
-living
-lo
-load
-loaded
-loan
-loans
-lobby
-local
-locate
-located
-location
-lock
-locked
-locking
-log
-logical
-lone
-lonely
-long
-long-range
-long-term
-longer
-longest
 look
-looked
 looking
 looks
-loop
-loose
-lose
-losing
-loss
-losses
-lost
-lot
-lots
-loud
-love
-low
-lower
-lowered
-loyalty
-lt
 ltd
-luck
-lucky
-lumber
-lunch
-luncheon
-lungs
-luxury
-lying
 m
-ma
-machine
-machinery
-machines
-mad
 made
-magazine
-magazines
-magic
-magnet
-magnetic
-magnificent
-magnitude
-maid
-mail
-main
 mainly
-maintain
-maintained
-maintaining
-maintenance
-maj
-major
-majority
 make
-makers
 makes
 making
-male
-males
-man
-manage
-managed
-management
-manager
-managers
-mankind
-manner
-manufacturer
-manufacturers
-manufacturing
 many
-map
-mar
-marble
-march
-marginal
-mark
-marked
-market
-marketing
-markets
-marks
-marriage
-marriages
-married
-marshall
-mass
-masses
-massive
-master
-match
-matching
-mate
-material
-materials
-mathematical
-mathematics
-matter
-matters
-mature
-maturity
-maximum
 may
 maybe
 mayn't
-mdt
+maynt
 me
-meal
-meals
 mean
-meaning
-meaningful
-meanings
 means
-meant
 meantime
 meanwhile
-measure
-measured
-measurement
-measures
-measuring
-meat
-mechanical
-mechanism
-medical
-medicine
-medium
-meet
-meeting
-meetings
-meets
-melody
-melting
 member
 members
-membership
-memory
 men
-mental
-mention
-mentioned
-merchant
-merchants
-mere
 merely
-merger
-merit
-mess
-message
-messrs
-met
-meta
-metal
-method
-methods
-mg
-middle
-middle-class
-midnight
 might
+might've
 mightn't
-mighty
-mile
-miles
-military
-milk
-milligrams
-million
-millions
-mind
-minds
+mightnt
 mine
-mines
-minimal
-minimum
-minor
-minority
 minus
-minute
-minutes
-mirror
 miss
-missed
-missile
-missing
-mission
-mistake
-mix
-mixed
-mixture
-mlle
-mm
-mme
-mobile
-mode
-model
-moderate
-modern
-modest
-mold
-molecule
-moment
-monday
-money
-month
-monthly
-months
-monument
-mood
-moon
-moral
-morality
 more
 moreover
-morning
 most
 mostly
-mother
-mothers
-motion
-motive
-motives
-motor
-mount
-mountain
-mounted
-mouth
 move
-moved
-movement
-movements
-moves
-movie
-movies
-moving
 mr
 mrs
-ms
-msgr
-mss
-mst
 much
-mud
-multiple
-multiply
-municipal
-murder
-muscle
-muscles
-music
-musical
-musician
-musicians
 must
-mustard
+must've
 mustn't
-mutual
+mustnt
 my
+myse"
 myself
-mysterious
-mystery
-myth
+myse”
 n
-naked
 name
-named
 namely
-names
-narrative
-narrow
-nation
-national
-natural
-naturally
-nature
-naval
-nd
+nay
 near
-nearby
-nearest
 nearly
-neat
 necessarily
 necessary
-necessity
-neck
-need
-needed
-needing
 needn't
-needs
-negative
-negotiations
-neighbor
-neighborhood
-neighboring
-neighbors
+neednt
 neither
-nerves
-nervous
-nest
-net
-network
-neutral
 never
 neverf
 neverless
 nevertheless
-new
-newer
-newest
-newly
-news
-newspaper
-newspapers
 next
-nice
-night
-nights
 nine
-nineteenth
 ninety
-ninth
 no
 no-one
-noble
 nobody
-nodded
-noise
 non
 none
 nonetheless
-noon
 noone
 nor
-normal
-normally
-norms
-north
-nose
+nos
 not
-notable
-note
 noted
-notes
 nothing
-notice
-noticed
-notion
 notwithstanding
-noun
-nov
-novel
-novels
-november
 now
 nowhere
-nude
+null
 number
 numbers
-numeral
-numerous
-nuts
 o
-object
-objective
-objectives
-objects
-obligations
-obliged
-observation
-observations
-observe
-observed
-observers
 obtain
 obtained
-obvious
 obviously
-occasion
-occasional
-occasionally
-occasions
-occupation
-occupied
-occur
-occurred
-occurrence
-occurring
-occurs
-ocean
-oct
-october
-odd
 of
 off
-offer
-offered
-offering
-offers
-office
-officer
-officers
-offices
-official
-officials
 often
 oh
-oil
 ok
 okay
-okt
-old
-older
-oldest
 on
 once
 one
 one's
 ones
 only
-onset
 onto
-op
-open
-opened
-opening
-openly
 opens
-operate
-operated
-operating
-operation
-operational
-operations
-operator
-opportunities
-opportunity
-opposed
-opposite
-opposition
-optimal
 or
-oral
-orchestra
-ord
-order
-ordered
-ordering
-orderly
-orders
-organ
-organic
-organization
-organizations
-organized
-origin
-original
-originally
 other
 others
 otherwise
 ought
 oughtn't
+oughtnt
 our
 ours
 ourselves
 out
-outcome
-outdoor
-outlook
-output
-outside
-outstanding
 over
-over-all
 overall
-overcome
-overseas
-overwhelming
+owing
 own
-owned
-owner
-owners
-ownership
-oxidation
-oxygen
 p
-pace
-pack
-package
-packed
-page
-pages
-paid
-pain
-painful
-paint
-painted
-painter
-painting
-paintings
-pair
-pale
-panel
-panels
-panic
-paper
-papers
-parade
-paragraph
-parallel
-parent
-parents
-parked
-parking
-parks
 part
-part-time
 parted
-partially
-participate
-participation
-particle
-particles
 particular
 particularly
-parties
 parting
-partisan
-partly
-partner
 parts
-party
-pass
-passage
-passages
-passed
-passenger
-passengers
-passes
-passing
-passion
 past
-patent
-path
-pathology
-patience
-patient
-patients
-patrol
-pattern
-patterns
-pause
-paused
-pay
-paying
-payment
-payments
-pdt
-peace
-peaceful
-peas
-peculiar
-peered
-pencil
-penny
-people
-peoples
 per
-percent
-percentage
-perception
-perfect
-perfectly
-perform
-performance
-performances
-performed
 perhaps
-period
-periods
-permanent
-permission
-permit
-permits
-permitted
-person
-personal
-personality
-personally
-personnel
-persons
-perspective
-persuaded
-pertinent
-petitioner
-pfc
-ph
-ph.d
-phase
-phases
-phd
-phenomena
-phenomenon
-philosophical
-philosophy
-phone
-phrase
-physical
-physically
-physics
-piano
-pick
-picked
-picture
-pictures
-piece
-pieces
-pile
-pilot
-pink
-pioneer
-pipe
-pistol
-pitch
 place
 placed
 places
-placing
-plain
-plan
-plane
-planes
-planet
-planetary
-planets
-planned
-planning
-plans
-plant
-plants
-plaster
-plastic
-plastics
-plate
-plates
-platform
-play
-played
-player
-players
-playing
-plays
-pleasant
 please
-pleased
-pleasure
-plenty
-plot
-plug
-plural
 plus
-pm
-pocket
-poem
-poems
-poet
-poetic
-poetry
-poets
 point
 pointed
 pointing
 points
-police
-policeman
-policies
-policy
-political
-politicians
-politics
-polynomial
-pond
-pool
-poor
-popular
-populate
-population
-porch
-port
-portion
-pose
-position
-positions
-positive
-possessed
-possession
-possibilities
-possibility
 possible
 possibly
-post
-posted
-posts
-pot
-potential
-pound
-pounds
-poured
-poverty
-powder
-power
-powerful
-powers
-pp
-practical
-practically
-practice
-practices
-preceding
-precious
-precise
-precisely
-precision
-prefer
-preferred
-preliminary
-preparation
-prepare
-prepared
-preparing
-presence
+potentially
+predominantly
 present
-presentation
 presented
 presenting
 presents
-preserve
-president
-press
-pressed
-pressing
-pressure
-pressures
-prestige
 presumably
-pretty
-prevent
-prevented
-prevention
-previous
 previously
-price
-prices
-pride
 primarily
-primary
-prime
-primitive
-principal
-principle
-principles
-print
-printed
-prior
-prison
-prisoners
-private
-prize
-probabilities
-probability
-probable
 probably
-problem
-problems
-procedure
-procedures
-proceeded
-process
-processes
-processing
-procurement
-produce
-produced
-producing
-product
-production
-productive
-products
-prof
-profession
-professional
-profit
-profits
-profound
-program
-programs
-progress
-project
-projects
-prominent
-promise
-promised
-promises
-promising
-promote
-promotion
 promptly
-proof
-propaganda
-proper
-properly
-properties
-property
-proportion
-proposal
-proposals
-proposed
-prospect
-prospective
-prospects
-protect
-protected
-protection
-protein
-protest
-proud
-prove
-proved
-provide
 provided
 provides
-providing
-provision
-provisions
-pst
-psychological
-public
-publication
-publicity
-publicly
-published
-publisher
-pull
-pulled
-pulling
-pulmonary
-punishment
-pupil
-pupils
-purchase
-purchased
-pure
-purely
-purpose
-purposes
-pursuant
-pursue
-push
-pushed
 put
 puts
-putting
-pvt
 q
-qualified
-qualities
-quality
-quantity
-quarrel
-quart
-quarter
-quarters
 que
-question
-questioned
-questioning
-questionnaire
-questions
-quick
-quickly
-quiet
-quietly
 quite
-quoted
-quotient
-qv
 r
-race
-races
-racial
-racing
-radar
-radiation
-radio
-rail
-railroad
-rain
-raise
-raised
-raising
 ran
-ranch
-rang
-range
-ranging
-rank
-ranks
-rapid
-rapidly
-rare
-rarely
-rate
-rates
 rather
-ratio
-rational
-raw
-rd
-re
-reach
-reached
-reaches
-reaching
-reaction
-reactionary
-reactions
-read
-reader
-readers
 readily
-reading
-ready
-real
-realism
-realistic
-reality
-realization
-realize
-realized
 really
-rear
-reason
-reasonable
 reasonably
-reasons
-recall
-recalled
-receive
-received
-receives
-receiving
 recent
 recently
-reception
-recognize
-recognized
-recommend
-recommendation
-recommendations
-recommended
-record
-recorded
-recording
-records
-recovery
-recreation
-rector
-red
-reduce
-reduced
-reducing
-reduction
-ref
-refer
-reference
-referred
-reflect
-reflected
-reflection
-reflects
-reform
-refrigerator
-refund
-refused
-regard
-regarded
 regarding
 regardless
 regards
-regime
-regiment
-region
-regional
-regions
-register
-registered
-registration
-regular
-regularly
-regulations
-rehabilitation
-rejected
 related
-relating
-relation
-relations
-relationship
-relationships
-relative
 relatively
-relatives
-release
-released
-relevant
-reliable
-relief
-relieved
-religion
-religious
-remain
-remainder
-remained
-remaining
-remains
-remark
-remarkable
-remarked
-remarks
-remember
-remembered
-reminded
-remote
-removal
-remove
-removed
-rendered
-rent
-reorganization
-rep
-repair
-repeat
-repeated
-replace
-replaced
-replacement
-replied
-reply
-report
-reported
-reporter
-reporters
-reports
-represent
-representative
-representatives
-represented
-representing
-represents
-reprint
-reps
-republic
-reputation
-request
-require
-required
-requirement
-requirements
-requires
-res
-research
-reserve
-reserved
-residence
-residential
-residents
-resist
-resistance
-resolution
-resolved
-resources
-respect
-respectable
-respective
 respectively
-respects
-respond
-responded
-response
-responses
-responsibilities
-responsibility
-responsible
-rest
-restaurant
-restrictions
-result
-resulted
-resulting
-results
-resumed
-retained
-retired
-retirement
-return
-returned
-returning
-returns
-rev
-reveal
-revealed
-reveals
-revenues
-review
-revolution
-revolutionary
-rhythm
-rich
-rid
-ride
-riding
-rifle
-rifles
-right
-rights
-rigid
-ring
-rise
-rises
-rising
-risk
-ritual
-river
-road
-roads
-rock
-rocks
-rode
-role
-roles
-roll
-rolled
-romantic
-roof
-room
-rooms
-root
-roots
-rope
-rose
-rough
-roughly
-round
-route
-routine
-row
-rt
-rub
-rule
-ruled
-rules
-ruling
 run
-running
-runs
-rural
-rush
-rushed
 s
-sacred
-sacrifice
-sad
-saddle
-safe
-safety
 said
-sail
-sailing
-sake
-salary
-sale
-sales
-saline
-salt
 same
-sample
-sampling
-sand
-sang
-sat
-satisfaction
-satisfactory
-satisfied
-saturday
-sauce
-save
-saved
-saving
-savings
 saw
 say
 saying
 says
-scale
-scarcely
-scared
-scattered
-scene
-scenes
-schedule
-scheduled
-scheme
-scholars
-scholarship
-school
-schools
-science
-scope
-score
-screen
-sea
-search
-searching
-season
-seat
-seated
 second
-secondary
 secondly
 seconds
-secret
-secretary
-secrets
 section
-sections
-secure
-security
 see
-seed
-seeds
 seeing
-seek
-seeking
 seem
 seemed
 seeming
 seems
 seen
 sees
-segment
-seized
-seldom
-select
-selected
-selection
 self
-sell
-selling
 selves
-sen
-senator
-send
-sending
-senior
-sens
-sense
-sensible
-sensitive
-sensitivity
 sent
-sentence
-sentiment
-sep
-separate
-separated
-sept
-september
-sequence
-sergeant
-series
-serious
-seriously
-servants
-serve
-served
-serves
-service
-services
-serving
-session
-sessions
-set
-sets
-setting
-settle
-settled
-settlement
 seven
-seventh
+seventy
 several
-severe
-sewage
-sex
-sexual
-sfc
-sgt
-shade
-shadows
-shaking
 shall
-shame
 shan't
-shape
-shapes
-share
-shared
-shares
-sharing
-sharp
-sharply
+shant
 she
 she'd
 she'll
 she's
-shear
-sheep
-sheet
+shed
 shell
-shelter
-shelters
-shift
-shine
-shining
-ship
-shipping
-ships
-shirt
-shock
-shoe
-shoes
-shook
-shoot
-shooting
-shop
-shopping
-shore
-short
-shortly
-shorts
-shot
-shots
+shes
 should
-shoulder
-shoulders
+should've
 shouldn
 shouldn't
-shout
-shouted
-shouting
+shouldnt
 show
 showed
 showing
 shown
+showns
 shows
-shut
-sick
 side
 sides
-sidewalk
-sighed
-sight
-sign
-signal
-signals
-signed
-significance
 significant
-signs
-silence
-silent
-silver
+significantly
 similar
 similarly
-simple
-simply
-sin
 since
-sing
-singing
-single
-sink
-sister
-sit
 site
-sitter
-sitting
-situation
-situations
 six
-sixteen
-sixties
 sixty
-size
-skill
-skilled
-skills
-skin
-skirt
-sky
-skywave
-slave
-sleep
-slender
-slept
-slid
-slide
-slight
 slightly
-slim
-slip
-slipped
-slow
-slowly
-small
-smaller
-smallest
-smart
-smell
-smile
-smiled
-smoke
-smooth
-snake
-snakes
-snapped
-snow
-so
-so-called
-soap
-social
-socialism
-societies
-society
-soft
-softly
-soil
-sold
-soldier
-solely
-solid
-solution
-solve
-solved
 some
 somebody
 someday
 somehow
 someone
+somethan
 something
 sometime
 sometimes
 somewhat
 somewhere
-son
-song
-songs
 soon
-sophisticated
-sorry
-sort
-sought
-soul
-souls
-sound
-sounded
-sounds
-source
-sources
-south
-sovereign
-sovereignty
-space
-span
-spare
-speak
-speaker
-speaking
-special
-specialists
-species
-specific
 specifically
 specified
 specify
 specifying
-specimen
-spectacular
-speech
-speeches
-speed
-spell
-spend
-spending
-spent
-sphere
-spirit
-spirits
-spiritual
-spite
-splendid
-spoke
-spoken
-sponsor
-sponsored
-spot
-spots
-spread
-spring
-square
-sr
-st
-stable
-staff
-stage
-stages
-staining
-stairs
-stake
-stand
-standard
-standards
-standing
-stands
-star
-stared
-staring
-stars
-start
-started
-starting
-startled
-starts
 state
-stated
-statement
-statements
 states
-station
-stations
-statistics
-status
-stay
-stayed
-stead
-steadily
-steady
-steam
-steel
-stem
-stems
-step
-stepped
-steps
-stick
-sticks
-stiff
 still
-stock
-stockholders
-stomach
-stone
-stood
 stop
-stopped
-storage
-store
-stored
-stores
-stories
-storm
-story
-straight
-strain
-strange
-stranger
-strategic
-strategy
-stream
-street
-streets
-stress
-stressed
-stresses
-stretch
-stretched
-strictly
-strike
-strikes
-striking
-string
-strip
-stroke
-strong
-stronger
-strongest
 strongly
-struck
-structural
-structure
-structures
-struggle
-struggling
-stuck
-student
-studio
-study
-stuff
-stumbled
-stupid
-style
-styles
-sub
-subject
-subjected
-subjects
-submitted
-substance
-substances
-substantial
 substantially
-substitute
-substrate
-subtle
-subtract
-suburban
-succeeded
-success
-successes
-successful
 successfully
-succession
 such
-sudden
-suddenly
-suffer
-suffered
-suffering
-sufficient
 sufficiently
-suffix
-sugar
 suggest
-suggested
-suggestion
-suggestions
-suggests
-suit
-suitable
-suitcase
-suite
-suited
-suits
-sum
-summary
-summer
-sun
-sunday
 sup
-supervision
-supper
-supplement
-supplied
-supplies
-supply
-support
-supported
-supporting
-suppose
-supposed
-supt
 sure
-surely
-surface
-surfaces
-surg
-surplus
-surprise
-surprised
-surprising
-surrender
-surrounded
-survey
-survival
-survive
-suspect
-suspected
-suspended
-suspicion
-sweat
-sweet
-swept
-swift
-swim
-swimming
-swing
-switch
-switches
-swung
-syllable
-symbol
-symbolic
-symbols
-sympathetic
-sympathy
-system
-systems
 t
 t's
-table
-tables
-tactics
-tag
-tagged
-tags
-tail
 take
 taken
-takes
 taking
-tale
-talent
-talents
-talk
-talked
-talking
-tall
-tangent
-tangible
-tape
-target
-task
-tasks
-taste
-taught
-teach
-team
-teams
-tears
-technical
-technique
-techniques
-technology
-teeth
-telephone
-television
 tell
-telling
-tells
-temperature
-temperatures
-temporarily
-temporary
 ten
-tend
-tended
-tendency
 tends
-tension
-tent
-term
-terms
-terrible
-test
-tested
-testimony
-testing
-tests
-text
-textile
-th
 than
-thank
-thanks
-thanx
 that
 that'll
 that's
 that've
+thatll
 thats
+thatve
 the
-theater
 their
 theirs
 them
-theme
 themselves
 then
 thence
-theological
-theoretical
-theories
-theory
 there
 there'd
 there'll
@@ -4178,332 +774,96 @@ there's
 there've
 thereafter
 thereby
+thered
 therefore
 therein
+therell
+thereof
+therere
 theres
+thereto
 thereupon
-thermal
+thereve
 these
 they
 they'd
 they'll
 they're
 they've
-thick
-thickness
+theyd
+theyll
+theyre
+theyve
 thin
 thing
 things
-think
-thinking
-thinks
 third
 thirty
 this
 thorough
 thoroughly
 those
+thou
 though
-thought
-thoughts
+thoughh
 thousand
-thousands
-threat
-threatened
-threatening
 three
-threw
-throat
+throug
 through
 throughout
-throw
-thrown
 thru
-thrust
-thursday
 thus
-thyroid
-tie
-tied
-tight
+til
 till
-time
-times
-tiny
 tip
-tire
-tired
-tissue
-title
+tis
 to
-toast
 today
-toes
 together
-told
-tomorrow
-tone
-tones
-tongue
-tonight
-tons
 too
 took
-tool
-tools
-tooth
 top
-torn
-tossed
-total
-totally
-touch
-touched
-tough
-tour
-tournament
 toward
 towards
-town
-towns
-trace
-track
-tractor
-trade
-traders
-trading
-tradition
-traditional
-traditions
-traffic
-tragedy
-tragic
-train
-trained
-training
-transfer
-transferred
-transformation
-transformed
-transition
-transportation
-trap
-travel
-traveled
-treat
-treated
-treatment
-tree
-trees
-trembling
-tremendous
-trend
-trends
-trial
-trials
-triangle
-tribute
 tried
 tries
-trim
-trip
-trips
-triumph
-troops
-trouble
-troubled
-troubles
-truck
-trucks
-true
-truly
-trust
-truth
 try
 trying
-tsunami
-tube
-tubes
-tuesday
-turn
-turned
-turning
-turns
+twas
+twelve
 twenty
-twenty-five
 twice
 two
-type
-types
-typical
 u
-ugly
-ultimate
-un
-unable
-uncertain
-uncle
-unconscious
 under
-underground
-underlying
 underneath
-understand
-understanding
-understood
-undoing
-undoubtedly
-uneasy
-unexpected
-unfortunate
-unfortunately
-unhappy
-uniform
-union
-unions
-unique
-unit
-units
-unity
-universal
-universe
-universities
-unknown
 unless
 unlike
-unlikely
 until
 unto
-unusual
 up
-update
 upon
-upper
-upstairs
-upward
-upwards
-urban
-urge
-urged
-urgent
-url
 us
 use
 used
-useful
-user
-username
 uses
 using
-usual
 usually
-utc
-utility
-utopian
-utterly
 v
-vacation
-vacuum
-valid
-valign
-valley
-valuable
-value
-variable
-variables
-variation
-variations
-varied
-variety
 various
-vary
-varying
-vast
-vehicle
-vehicles
-vein
-velocity
-venture
-verb
-verbal
-verse
-version
 versus
 very
-veteran
 via
-vice
-video
-view
-viewed
-views
-vigorous
-village
-virtually
-virtue
-visible
-vision
-visit
-visited
-visiting
-visitors
-visual
-vital
-vivid
 viz
-vocational
-voice
-voices
-vol
-volume
-volumes
-voluntary
-volunteers
-vote
-voted
-voters
-votes
-voting
-vowel
-vs
 w
-wage
-wages
-wagon
-wait
-waited
-waiting
-wake
-walk
-walked
-walking
-wall
-walls
 want
 wanted
 wanting
 wants
-war
-ward
-warfare
-warm
-warmth
-warned
-warning
-warrant
 was
-wash
-washed
-washing
+wasn
 wasn't
-waste
-watch
-watched
-watching
-water
-waters
-wave
-waves
+wasnt
 way
 ways
 we
@@ -4511,44 +871,38 @@ we'd
 we'll
 we're
 we've
-weak
-weakness
-wear
-wearing
-weather
-web
-wedding
-wednesday
-week
-weekend
-weekly
-weeks
-weight
-welcome
 well
 wells
 went
 were
+weren
 weren't
-west
-wet
+werent
+weve
 what
+what'd
 what'll
 what's
 what've
 whatever
-wheel
-wheels
+whatll
+whats
+whatve
 when
+when'd
+when'll
 when's
 whence
 whenever
 where
+where'd
+where'll
 where's
 whereafter
 whereas
 whereby
 wherein
+wheres
 whereupon
 wherever
 whether
@@ -4556,128 +910,58 @@ which
 whichever
 while
 whilst
-whip
-whisky
-whispered
-white
+whim
 whither
 who
 who'd
 who'll
 who's
+whod
 whoever
 whole
-wholly
+wholl
 whom
 whomever
+whos
 whose
 why
+why'd
+why'll
 why's
-wide
 widely
-widespread
-widow
 width
-wife
-wild
-wildlife
-wildly
 will
-willing
-win
-wind
-window
-windows
-winds
-wine
-wines
-wing
-wings
-winning
-winter
-wiped
-wire
-wisdom
-wish
-wished
-wishes
-wit
 with
 within
 without
-witness
-witnesses
-wives
-woman
-women
-won
 won't
-wonder
-wondered
-wonderful
-wondering
-wood
-wooden
-woods
-word
-words
-wore
-work
-worked
-worker
-workers
-working
-works
-workshop
-world
-worn
-worried
-worries
-worry
-worse
-worst
-worth
-worthy
+wont
 would
+would've
 wouldn
 wouldn't
-wound
-wounded
-write
-writers
-writes
-writing
-written
-wrong
-wrote
+wouldnt
+ws
 www
 x
-xml
 y
-yard
-yards
+ye
 year
-year-old
 years
-yelled
-yellow
 yes
-yesterday
 yet
-yield
-york
 you
 you'd
 you'll
 you're
 you've
-young
-younger
-youngest
-youngsters
+youd
+youll
 your
+youre
 yours
 yourself
 yourselves
+youve
 z
-zero
+zero
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/es/es_stop_words.txt b/apps/common/src/python/mediawords/languages/es/es_stop_words.txt
index 4f08f76cb8..91e465d8f7 100644
--- a/apps/common/src/python/mediawords/languages/es/es_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/es/es_stop_words.txt
@@ -1,69 +1,199 @@
-#
-# This is a stop word list for the Spanish language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-es/blob/master/stopwords-es.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 a
 acerca
+actualmente
+adelante
+ademas
 además
 adónde
+afirmó
+agregó
+ahi
+ahí
+ahora
 al
 algo
+algún
+alguna
 algunas
+alguno
 algunos
+alli
+allí
+alrededor
+ambos
+ampleamos
+añadió
+antano
+antaño
 ante
+anterior
 antes
+apenas
+aproximadamente
 aquel
+aquél
 aquella
+aquélla
 aquellas
+aquéllas
+aquello
 aquellos
+aquéllos
+aqui
+aquí
+arriba
+arribaabajo
+aseguró
 aseveró
+asi
 así
+atras
+aun
+aún
 aunque
+ayer
+b
+bajo
+bastante
+bien
+breve
+buen
+buena
+buenas
+bueno
+buenos
+c
 cada
+casi
+cerca
+cierta
+ciertos
+cinco
+claro
+comentó
 como
+cómo
 con
+conmigo
+conocer
+conseguimos
+conseguir
+considera
+consideró
+consigo
+consigue
+consiguen
+consigues
+contigo
 contra
+cosas
+creo
 cual
+cuál
 cuales
+cuáles
+cualquier
 cualquiera
 cuando
-cuál
-cuáles
+cuándo
+cuanta
+cuánta
+cuantas
+cuántas
+cuanto
 cuánto
+cuantos
+cuántos
+cuatro
+cuenta
+d
+da
+dado
+dan
+dar
 de
+debajo
 debe
+deben
+debido
+decir
+dejó
 del
+delante
+demás
+demasiado
+dentro
 desde
+despues
 después
 destacó
+detras
+detrás
+dia
+día
+dias
+días
+dice
+dicen
 dicho
+dieron
+diferente
+diferentes
+dijeron
 dijo
+dio
 donde
+dónde
+dos
 durante
 e
+ejemplo
 el
+él
 ella
 ellas
+ello
 ellos
+empleais
+emplean
+emplear
+empleas
+empleo
 en
+encima
+encuentra
+enfrente
+entonces
 entre
 era
 erais
+eramos
+éramos
 eran
 eras
 eres
 es
 esa
+ésa
 esas
+ésas
 ese
+ése
 eso
 esos
+ésos
 esta
+está
+ésta
 estaba
 estabais
+estábamos
 estaban
 estabas
 estad
@@ -71,154 +201,233 @@ estada
 estadas
 estado
 estados
+estais
+estáis
 estamos
+estan
+están
 estando
 estar
-estaremos
 estará
 estarán
 estarás
 estaré
 estaréis
+estaremos
 estaría
 estaríais
 estaríamos
 estarían
 estarías
 estas
+estás
+éstas
 este
+esté
+éste
+estéis
 estemos
+estén
+estés
 esto
 estos
+éstos
 estoy
 estuve
 estuviera
 estuvierais
+estuviéramos
 estuvieran
 estuvieras
 estuvieron
 estuviese
 estuvieseis
+estuviésemos
 estuviesen
 estuvieses
 estuvimos
 estuviste
 estuvisteis
-estuviéramos
-estuviésemos
 estuvo
-está
-estábamos
-estáis
-están
-estás
-esté
-estéis
-estén
-estés
+ex
 excepto
+existe
+existen
+explicó
 expresó
+f
 fue
 fuera
 fuerais
+fuéramos
 fueran
 fueras
 fueron
 fuese
 fueseis
+fuésemos
 fuesen
 fueses
 fui
 fuimos
 fuiste
 fuisteis
-fuéramos
-fuésemos
+g
+general
+gran
+grandes
+gueno
+h
 ha
+habéis
+haber
+habia
+había
+habíais
+habíamos
+habían
+habías
 habida
 habidas
 habido
 habidos
 habiendo
-habremos
+habla
+hablan
 habrá
 habrán
 habrás
 habré
 habréis
+habremos
 habría
 habríais
 habríamos
 habrían
 habrías
-habéis
-había
-habíais
-habíamos
-habían
-habías
 hace
+haceis
+hacemos
+hacen
 hacer
+hacerlo
+haces
 hacia
 hacía
+haciendo
+hago
 han
 has
 hasta
 hay
 haya
+hayáis
 hayamos
 hayan
 hayas
-hayáis
 he
+hecho
 hemos
 hicieron
 hicimos
+hizo
+horas
+hoy
 hube
 hubiera
 hubierais
+hubiéramos
 hubieran
 hubieras
 hubieron
 hubiese
 hubieseis
+hubiésemos
 hubiesen
 hubieses
 hubimos
 hubiste
 hubisteis
-hubiéramos
-hubiésemos
 hubo
+i
+igual
+incluso
 indicó
+informo
 informó
+intenta
+intentais
+intentamos
+intentan
+intentar
+intentas
+intento
+ir
+j
+junto
+k
+l
 la
 lado
 lados
+largo
 las
 le
+lejos
 les
+llegó
 lleva
+llevar
 lo
 los
 luego
+lugar
+m
+mal
+manera
+manifestó
+mas
+más
+mayor
 me
 mediante
+medio
+mejor
+mencionó
+menos
+menudo
 mi
+mí
+mia
+mía
+mias
+mías
+mientras
+mio
+mío
+mios
+míos
 mis
 misma
+mismas
 mismo
+mismos
+modo
+momento
+mucha
+muchas
 mucho
 muchos
 muy
-más
-mí
-mía
-mías
-mío
-míos
+n
 nada
+nadie
 ni
+ningún
+ninguna
+ningunas
+ninguno
+ningunos
 no
 nos
 nosotras
@@ -227,147 +436,291 @@ nuestra
 nuestras
 nuestro
 nuestros
+nueva
+nuevas
+nuevo
+nuevos
+nunca
 o
 obstante
+ocho
 os
 otra
 otras
 otro
 otros
+p
+pais
+paìs
 para
+parece
 parte
+partir
+pasada
+pasado
+peor
 pero
+pesar
+poca
+pocas
 poco
+pocos
+podeis
+podemos
+poder
+podrá
+podrán
+podria
+podría
+podriais
+podriamos
+podrian
+podrían
+podrias
+poner
 por
+por qué
 porque
 porqué
+posible
+primer
+primera
+primero
+primeros
+principalmente
+pronto
+propia
+propias
+propio
+propios
+proximo
+próximo
+próximos
 pudieron
 pudiese
 pudimos
+pudo
+pueda
 puede
+pueden
+puedo
+pues
+q
+qeu
 que
+qué
+quedó
+queremos
 quien
+quién
 quienes
-qué
+quiénes
+quiere
+quiza
+quizá
+quizas
+quizás
+r
+s
+sabe
+sabeis
+sabemos
+saben
+saber
+sabes
+sal
+salvo
 se
+sé
 sea
+seáis
 seamos
 sean
 seas
+segun
 según
-seremos
+segunda
+segundo
+seis
+señaló
+ser
+sera
 será
 serán
 serás
 seré
 seréis
+seremos
 sería
 seríais
 seríamos
 serían
 serías
-seáis
-señaló
 si
+sí
 sido
+siempre
 siendo
+siete
+sigue
+siguiente
 sin
+sino
 sobre
 sois
-solo
+sola
+solamente
+solas
 solía
+solo
+sólo
+solos
 somos
 son
 soy
+soyos
 su
 suele
+supuesto
 sus
 suya
 suyas
 suyo
 suyos
-sí
-sólo
+t
+tal
+tambien
 también
+tampoco
+tan
 tanto
+tarde
 te
-tendremos
+temprano
 tendrá
 tendrán
 tendrás
 tendré
 tendréis
+tendremos
 tendría
 tendríais
 tendríamos
 tendrían
 tendrías
 tened
+teneis
+tenéis
 tenemos
+tener
 tenga
+tengáis
 tengamos
 tengan
 tengas
 tengo
-tengáis
-tenida
-tenidas
-tenido
-tenidos
-teniendo
-tenéis
 tenía
 teníais
 teníamos
 tenían
 tenías
+tenida
+tenidas
+tenido
+tenidos
+teniendo
+tercera
 ti
+tiempo
 tiene
 tienen
 tienes
 toda
 todas
+todavia
+todavía
 todo
 todos
+total
+trabaja
+trabajais
+trabajamos
+trabajan
+trabajar
+trabajas
+trabajo
 tras
+trata
 través
+tres
 tu
+tú
 tus
 tuve
 tuviera
 tuvierais
+tuviéramos
 tuvieran
 tuvieras
 tuvieron
 tuviese
 tuvieseis
+tuviésemos
 tuviesen
 tuvieses
 tuvimos
 tuviste
 tuvisteis
-tuviéramos
-tuviésemos
 tuvo
 tuya
 tuyas
 tuyo
 tuyos
-tú
+u
+última
+últimas
+ultimo
+último
+últimos
 un
 una
 unas
 uno
 unos
+usa
+usais
+usamos
+usan
+usar
+usas
+uso
+usted
+ustedes
+v
+va
+vais
+valor
+vamos
+van
+varias
+varios
+vaya
+veces
+ver
+verdad
+verdadera
+verdadero
 vez
 vosotras
 vosotros
+voy
 vuestra
 vuestras
 vuestro
 vuestros
+w
+x
 y
 ya
 yo
-él
-éramos
+z
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/es/es_stop_words_old.txt b/apps/common/src/python/mediawords/languages/es/es_stop_words_old.txt
new file mode 100644
index 0000000000..4f08f76cb8
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/es/es_stop_words_old.txt
@@ -0,0 +1,373 @@
+#
+# This is a stop word list for the Spanish language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+a
+acerca
+además
+adónde
+al
+algo
+algunas
+algunos
+ante
+antes
+aquel
+aquella
+aquellas
+aquellos
+aseveró
+así
+aunque
+cada
+como
+con
+contra
+cual
+cuales
+cualquiera
+cuando
+cuál
+cuáles
+cuánto
+de
+debe
+del
+desde
+después
+destacó
+dicho
+dijo
+donde
+durante
+e
+el
+ella
+ellas
+ellos
+en
+entre
+era
+erais
+eran
+eras
+eres
+es
+esa
+esas
+ese
+eso
+esos
+esta
+estaba
+estabais
+estaban
+estabas
+estad
+estada
+estadas
+estado
+estados
+estamos
+estando
+estar
+estaremos
+estará
+estarán
+estarás
+estaré
+estaréis
+estaría
+estaríais
+estaríamos
+estarían
+estarías
+estas
+este
+estemos
+esto
+estos
+estoy
+estuve
+estuviera
+estuvierais
+estuvieran
+estuvieras
+estuvieron
+estuviese
+estuvieseis
+estuviesen
+estuvieses
+estuvimos
+estuviste
+estuvisteis
+estuviéramos
+estuviésemos
+estuvo
+está
+estábamos
+estáis
+están
+estás
+esté
+estéis
+estén
+estés
+excepto
+expresó
+fue
+fuera
+fuerais
+fueran
+fueras
+fueron
+fuese
+fueseis
+fuesen
+fueses
+fui
+fuimos
+fuiste
+fuisteis
+fuéramos
+fuésemos
+ha
+habida
+habidas
+habido
+habidos
+habiendo
+habremos
+habrá
+habrán
+habrás
+habré
+habréis
+habría
+habríais
+habríamos
+habrían
+habrías
+habéis
+había
+habíais
+habíamos
+habían
+habías
+hace
+hacer
+hacia
+hacía
+han
+has
+hasta
+hay
+haya
+hayamos
+hayan
+hayas
+hayáis
+he
+hemos
+hicieron
+hicimos
+hube
+hubiera
+hubierais
+hubieran
+hubieras
+hubieron
+hubiese
+hubieseis
+hubiesen
+hubieses
+hubimos
+hubiste
+hubisteis
+hubiéramos
+hubiésemos
+hubo
+indicó
+informó
+la
+lado
+lados
+las
+le
+les
+lleva
+lo
+los
+luego
+me
+mediante
+mi
+mis
+misma
+mismo
+mucho
+muchos
+muy
+más
+mí
+mía
+mías
+mío
+míos
+nada
+ni
+no
+nos
+nosotras
+nosotros
+nuestra
+nuestras
+nuestro
+nuestros
+o
+obstante
+os
+otra
+otras
+otro
+otros
+para
+parte
+pero
+poco
+por
+porque
+porqué
+pudieron
+pudiese
+pudimos
+puede
+que
+quien
+quienes
+qué
+se
+sea
+seamos
+sean
+seas
+según
+seremos
+será
+serán
+serás
+seré
+seréis
+sería
+seríais
+seríamos
+serían
+serías
+seáis
+señaló
+si
+sido
+siendo
+sin
+sobre
+sois
+solo
+solía
+somos
+son
+soy
+su
+suele
+sus
+suya
+suyas
+suyo
+suyos
+sí
+sólo
+también
+tanto
+te
+tendremos
+tendrá
+tendrán
+tendrás
+tendré
+tendréis
+tendría
+tendríais
+tendríamos
+tendrían
+tendrías
+tened
+tenemos
+tenga
+tengamos
+tengan
+tengas
+tengo
+tengáis
+tenida
+tenidas
+tenido
+tenidos
+teniendo
+tenéis
+tenía
+teníais
+teníamos
+tenían
+tenías
+ti
+tiene
+tienen
+tienes
+toda
+todas
+todo
+todos
+tras
+través
+tu
+tus
+tuve
+tuviera
+tuvierais
+tuvieran
+tuvieras
+tuvieron
+tuviese
+tuvieseis
+tuviesen
+tuvieses
+tuvimos
+tuviste
+tuvisteis
+tuviéramos
+tuviésemos
+tuvo
+tuya
+tuyas
+tuyo
+tuyos
+tú
+un
+una
+unas
+uno
+unos
+vez
+vosotras
+vosotros
+vuestra
+vuestras
+vuestro
+vuestros
+y
+ya
+yo
+él
+éramos
diff --git a/apps/common/src/python/mediawords/languages/fi/fi_stop_words.txt b/apps/common/src/python/mediawords/languages/fi/fi_stop_words.txt
index aa2cb4cdf7..d1457203fe 100644
--- a/apps/common/src/python/mediawords/languages/fi/fi_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/fi/fi_stop_words.txt
@@ -1,18 +1,158 @@
-#
-# This is a stop word list for the Finnish language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
+aiemmin
+aika
+aikaa
+aikaan
+aikaisemmin
+aikaisin
+aikajen
+aikana
+aikoina
+aikoo
+aikovat
+aina
+ainakaan
+ainakin
+ainoa
+ainoat
+aiomme
+aion
+aiotte
+aist
+aivan
+ajan
+alas
+alemmas
+alkuisin
+alkuun
+alla
+alle
+aloitamme
+aloitan
+aloitat
+aloitatte
+aloitattivat
+aloitettava
+aloitettevaksi
+aloitettu
+aloitimme
+aloitin
+aloitit
+aloititte
+aloittaa
+aloittamatta
+aloitti
+aloittivat
+alta
+aluksi
+alussa
+alusta
+annettavaksi
+annetteva
+annettu
+ansiosta
+antaa
+antamatta
+antoi
+aoua
+apu
+asia
+asiaa
+asian
+asiasta
+asiat
+asioiden
+asioihin
+asioita
+asti
+avuksi
+avulla
+avun
+avutta
+edelle
+edelleen
+edellä
+edeltä
+edemmäs
+edes
+edessä
+edestä
+ehkä
 ei
+eikä
+eilen
 eivät
+eli
+ellei
+elleivät
+ellemme
+ellen
+ellet
+ellette
 emme
 en
+enemmän
+eniten
+ennen
+ensi
+ensimmäinen
+ensimmäiseksi
+ensimmäisen
+ensimmäisenä
+ensimmäiset
+ensimmäisiksi
+ensimmäisinä
+ensimmäisiä
+ensimmäistä
+ensin
+entinen
+entisen
+entisiä
+entisten
+entistä
+enää
+eri
+erittäin
+erityisesti
+eräiden
+eräs
+eräät
+esi
+esiin
+esillä
+esimerkiksi
 et
+eteen
+etenkin
+etessa
 ette
+ettei
 että
+haikki
+halua
+haluaa
+haluamatta
+haluamme
+haluan
+haluat
+haluatte
+haluavat
+halunnut
+halusi
+halusimme
+halusin
+halusit
+halusitte
+halusivat
+halutessa
+haluton
 he
+hei
 heidän
 heidät
 heihin
@@ -22,6 +162,27 @@ heiltä
 heissä
 heistä
 heitä
+helposti
+heti
+hetkellä
+hieman
+hitaasti
+hoikein
+huolimatta
+huomenna
+hyvien
+hyviin
+hyviksi
+hyville
+hyviltä
+hyvin
+hyvinä
+hyvissä
+hyvistä
+hyviä
+hyvä
+hyvät
+hyvää
 hän
 häneen
 hänelle
@@ -32,8 +193,13 @@ hänessä
 hänestä
 hänet
 häntä
+ihan
+ilmeisesti
 itse
+itsensä
+itseään
 ja
+jo
 johon
 joiden
 joihin
@@ -46,18 +212,90 @@ joissa
 joista
 joita
 joka
+jokainen
+jokin
+joko
 joksi
+joku
 jolla
 jolle
+jolloin
 jolta
+jompikumpi
 jona
 jonka
+jonkin
+jonne
+joo
+jopa
 jos
+joskus
 jossa
 josta
 jota
+jotain
+joten
+jotenkin
+jotenkuten
 jotka
+jotta
+jouduimme
+jouduin
+jouduit
+jouduitte
+joudumme
+joudun
+joudutte
+joukkoon
+joukossa
+joukosta
+joutua
+joutui
+joutuivat
+joutumaan
+joutuu
+joutuvat
+juuri
+jälkeen
+jälleen
+jää
+kahdeksan
+kahdeksannen
+kahdella
+kahdelle
+kahdelta
+kahden
+kahdessa
+kahdesta
+kahta
+kahteen
+kai
+kaiken
+kaikille
+kaikilta
+kaikkea
+kaikki
+kaikkia
+kaikkiaan
+kaikkialla
+kaikkialle
+kaikkialta
+kaikkien
+kaikkin
+kaksi
+kannalta
+kannattaa
 kanssa
+kanssaan
+kanssamme
+kanssani
+kanssanne
+kanssasi
+kauan
+kauemmas
+kaukana
+kautta
+kehen
 keiden
 keihin
 keiksi
@@ -67,6 +305,7 @@ keiltä
 keinä
 keissä
 keistä
+keitten
 keitä
 keneen
 keneksi
@@ -78,13 +317,68 @@ kenenä
 kenessä
 kenestä
 kenet
-ketkä
+kenettä
+kennessästä
+kenties
+kerran
+kerta
+kertaa
+keskellä
+kesken
 ketkä
 ketä
+kiitos
+kohti
+koko
+kokonaan
+kolmas
+kolme
+kolmen
+kolmesti
 koska
+koskaan
+kovin
 kuin
+kuinka
+kuinkan
+kuitenkaan
+kuitenkin
 kuka
+kukaan
+kukin
+kukka
+kumpainen
+kumpainenkaan
+kumpi
+kumpikaan
+kumpikin
 kun
+kuten
+kuuden
+kuusi
+kuutta
+kylliksi
+kyllä
+kymmenen
+kyse
+liian
+liki
+lisäksi
+lisää
+lla
+luo
+luona
+lähekkäin
+lähelle
+lähellä
+läheltä
+lähemmäs
+lähes
+lähinnä
+lähtien
+läpi
+mahdollisimman
+mahdollista
 me
 meidän
 meidät
@@ -95,14 +389,36 @@ meiltä
 meissä
 meistä
 meitä
+melkein
+melko
+menee
+meneet
+menemme
+menen
+menet
+menette
+menevät
+meni
+menimme
+menin
+menit
+menivät
+mennessä
+mennyt
+menossa
 mihin
+mikin
 miksi
 mikä
+mikäli
+mikään
 mille
+milloin
+milloinkan
 millä
 miltä
 minkä
-minkä
+minne
 minua
 minulla
 minulle
@@ -113,14 +429,48 @@ minusta
 minut
 minuun
 minä
-minä
 missä
 mistä
+miten
 mitkä
 mitä
+mitään
+moi
+molemmat
+mones
+monesti
+monet
+moni
+monta
+muassa
+muiden
+muita
+muka
 mukaan
+mukaansa
+mukana
 mutta
+muu
+muualla
+muualle
+muualta
+muuanne
+muulloin
+muun
+muut
+muuta
+muutama
+muutaman
+muuten
+myöhemmin
+myös
+myöskin
+myöskään
+myötä
 ne
+neljä
+neljän
+neljää
 niiden
 niihin
 niiksi
@@ -128,7 +478,6 @@ niille
 niillä
 niiltä
 niin
-niin
 niinä
 niissä
 niistä
@@ -144,6 +493,7 @@ noina
 noissa
 noista
 noita
+nro
 nuo
 nyt
 näiden
@@ -152,16 +502,28 @@ näiksi
 näille
 näillä
 näiltä
+näin
 näinä
 näissä
+näissähin
+näissälle
+näissältä
+näissästä
 näistä
 näitä
 nämä
+ohi
+oikea
+oikealla
+oikein
 ole
 olemme
 olen
 olet
 olette
+oleva
+olevan
+olevat
 oli
 olimme
 olin
@@ -176,21 +538,84 @@ olitte
 olivat
 olla
 olleet
+olli
 ollut
+oma
+omaa
+omaan
+omaksi
+omalle
+omalta
+oman
+omassa
+omat
+omien
+omiin
+omiksi
+omille
+omilta
+omissa
+omista
 on
+onkin
+onko
 ovat
+paikoittain
+paitsi
+pakosti
+paljon
+paremmin
+parempi
+parhaillaan
+parhaiten
+perusteella
+peräti
+pian
+pieneen
+pieneksi
+pienelle
+pienellä
+pieneltä
+pienempi
+pienestä
+pienin
 poikki
+puolesta
+puolestaan
+päälle
+saakka
+sadam
+sama
+samaa
+samaan
+samalla
+samallalta
+samallassa
+samallasta
+saman
+samat
+samoin
+sata
+satojen
 se
+seitsemän
 sekä
 sen
+seuraavat
+siellä
+sieltä
 siihen
 siinä
+siis
 siitä
+sijaan
 siksi
 sille
+silloin
 sillä
-sillä
+silti
 siltä
+sinne
 sinua
 sinulla
 sinulle
@@ -201,10 +626,32 @@ sinusta
 sinut
 sinuun
 sinä
-sinä
+siten
+sitten
 sitä
+ssa
+sta
+suoraan
+suuntaan
+suuret
+suuri
+suuria
+suurin
+suurten
+taa
+taas
+taemmas
+tahansa
 tai
+takaa
+takaisin
+takana
+takia
 tallä
+tapauksessa
+tarpeeksi
+tavalla
+tavoitteena
 te
 teidän
 teidät
@@ -215,6 +662,19 @@ teiltä
 teissä
 teistä
 teitä
+tietysti
+todella
+toinen
+tois
+toisaalla
+toisaalle
+toisaalta
+toiseen
+toiseksi
+toisella
+toiselle
+toiselta
+toisemme
 tuo
 tuohon
 tuoksi
@@ -239,4 +699,4 @@ tätä
 vaan
 vai
 vaikka
-yli
+yli
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/fi/fi_stop_words_old.txt b/apps/common/src/python/mediawords/languages/fi/fi_stop_words_old.txt
new file mode 100644
index 0000000000..aa2cb4cdf7
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/fi/fi_stop_words_old.txt
@@ -0,0 +1,242 @@
+#
+# This is a stop word list for the Finnish language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+ei
+eivät
+emme
+en
+et
+ette
+että
+he
+heidän
+heidät
+heihin
+heille
+heillä
+heiltä
+heissä
+heistä
+heitä
+hän
+häneen
+hänelle
+hänellä
+häneltä
+hänen
+hänessä
+hänestä
+hänet
+häntä
+itse
+ja
+johon
+joiden
+joihin
+joiksi
+joilla
+joille
+joilta
+joina
+joissa
+joista
+joita
+joka
+joksi
+jolla
+jolle
+jolta
+jona
+jonka
+jos
+jossa
+josta
+jota
+jotka
+kanssa
+keiden
+keihin
+keiksi
+keille
+keillä
+keiltä
+keinä
+keissä
+keistä
+keitä
+keneen
+keneksi
+kenelle
+kenellä
+keneltä
+kenen
+kenenä
+kenessä
+kenestä
+kenet
+ketkä
+ketkä
+ketä
+koska
+kuin
+kuka
+kun
+me
+meidän
+meidät
+meihin
+meille
+meillä
+meiltä
+meissä
+meistä
+meitä
+mihin
+miksi
+mikä
+mille
+millä
+miltä
+minkä
+minkä
+minua
+minulla
+minulle
+minulta
+minun
+minussa
+minusta
+minut
+minuun
+minä
+minä
+missä
+mistä
+mitkä
+mitä
+mukaan
+mutta
+ne
+niiden
+niihin
+niiksi
+niille
+niillä
+niiltä
+niin
+niin
+niinä
+niissä
+niistä
+niitä
+noiden
+noihin
+noiksi
+noilla
+noille
+noilta
+noin
+noina
+noissa
+noista
+noita
+nuo
+nyt
+näiden
+näihin
+näiksi
+näille
+näillä
+näiltä
+näinä
+näissä
+näistä
+näitä
+nämä
+ole
+olemme
+olen
+olet
+olette
+oli
+olimme
+olin
+olisi
+olisimme
+olisin
+olisit
+olisitte
+olisivat
+olit
+olitte
+olivat
+olla
+olleet
+ollut
+on
+ovat
+poikki
+se
+sekä
+sen
+siihen
+siinä
+siitä
+siksi
+sille
+sillä
+sillä
+siltä
+sinua
+sinulla
+sinulle
+sinulta
+sinun
+sinussa
+sinusta
+sinut
+sinuun
+sinä
+sinä
+sitä
+tai
+tallä
+te
+teidän
+teidät
+teihin
+teille
+teillä
+teiltä
+teissä
+teistä
+teitä
+tuo
+tuohon
+tuoksi
+tuolla
+tuolle
+tuolta
+tuon
+tuona
+tuossa
+tuosta
+tuotä
+tähän
+täksi
+tälle
+tältä
+tämä
+tämän
+tänä
+tässä
+tästä
+tätä
+vaan
+vai
+vaikka
+yli
diff --git a/apps/common/src/python/mediawords/languages/fr/fr_stop_words.txt b/apps/common/src/python/mediawords/languages/fr/fr_stop_words.txt
index 2f7ed427ca..291bd4a78d 100644
--- a/apps/common/src/python/mediawords/languages/fr/fr_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/fr/fr_stop_words.txt
@@ -1,18 +1,44 @@
-#
-# This is a stop word list for the French language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-fr/blob/master/stopwords-fr.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
+a
+à
+â
 a-t-on
+abord
+absolument
+afin
+ah
 ai
-aie
 aient
 aies
+ailleurs
+ainsi
 ait
+allaient
+allo
+allô
+allons
+alors
+anterieur
+anterieure
+anterieures
+apres
+après
 as
+assez
+attendu
 au
+aucun
+aucune
+aucuns
+aujourd
+aujourd'hui
+aupres
+auquel
 aura
 aurai
 auraient
@@ -24,41 +50,190 @@ auriez
 aurions
 aurons
 auront
+aussi
+autant
+autre
+autrement
+autres
+autrui
 aux
+auxquelles
+auxquels
 avaient
 avais
 avait
+avant
 avec
 avez
 aviez
-avions
+avoir
 avons
 ayant
 ayante
 ayantes
-ayants
 ayez
 ayons
+b
+bah
+bas
+basee
+beaucoup
+bien
+bigre
+bon
+boum
+brrr
 c
+ça
+car
 ce
+ceci
+cela
+celà
 celle
+celle-ci
+celle-là
+celles
+celles-ci
+celles-là
+celui
+celui-ci
+celui-là
+cent
+cependant
+certain
+certaine
+certaines
+certains
+certes
 ces
+cet
+cette
+ceux
+ceux-ci
+ceux-là
+chacun
+chacune
+chaque
+chère
+chères
+chers
+chez
+ci
+cinq
+cinquantaine
+cinquante
+cinquantième
+cinquième
+clac
+clic
+combien
+comme
+comment
+comparable
+comparables
+compris
+concernant
+contre
 d
 d'une
+da
 dans
 de
+debout
+début
+dedans
+dehors
+deja
+delà
+depuis
+dernier
+derniere
+derriere
+derrière
 des
+dès
+desormais
+désormais
+desquelles
+desquels
+dessous
+dessus
+deux
+deuxième
+deuxièmement
+devant
+devers
+devra
+devrait
+different
+différent
+différente
+differentes
+différentes
+differents
+différents
+dire
+dit
+dite
+dits
+dix
+dix-huit
+dix-neuf
+dix-sept
+dixième
+doit
+doivent
 donc
 dont
+dos
+douze
+douzième
+dring
+droite
 du
+duquel
+durant
+e
+effet
+egalement
+egales
+eh
 elle
+elle-même
+elles
+elles-mêmes
 en
+encore
+enfin
+entre
+envers
+environ
 es
+ès
+essai
 est
 et
+étaient
+étais
+était
+etant
+étant
+étante
+étantes
+étants
+état
+etc
+êtes
+étiez
+étions
+etre
+être
 eu
 eue
 eues
+euh
+eûmes
 eurent
 eus
 eusse
@@ -67,10 +242,27 @@ eusses
 eussiez
 eussions
 eut
-eux
-eûmes
 eût
 eûtes
+eux
+eux-mêmes
+exactement
+excepté
+extenso
+exterieur
+f
+façon
+fais
+faisaient
+faisant
+fait
+faites
+feront
+fi
+flac
+fois
+font
+fûmes
 furent
 fus
 fusse
@@ -79,56 +271,270 @@ fusses
 fussiez
 fussions
 fut
-fûmes
 fût
 fûtes
+g
+gens
+h
+ha
+haut
+hé
+hein
+hélas
+hem
+hep
+hi
+ho
+holà
+hop
+hormis
+hors
+hou
+houp
+hue
+hui
+huit
+huitième
+hum
+i
+ici
 il
 ils
+importe
 j
 je
+jusqu
+jusque
+juste
+k
 l
 l'
 la
+là
+laisser
+laquelle
+las
 le
+lequel
 les
+lès
+lesquelles
+lesquels
 leur
 leurs
+longtemps
+lors
+lorsque
 lui
+lui-meme
+lui-même
 m
 ma
+maint
+maintenant
 mais
+malgre
+malgré
 me
+meme
+même
+memes
+mêmes
+merci
 mes
+mien
+mienne
+miennes
+miens
+mille
+mince
+mine
+minimale
 moi
+moi-meme
+moi-même
+moindres
+moins
 mon
-même
+mot
+moyennant
+multiple
+multiples
 n
 n'a
 n'est
+na
+naturelles
 ne
+neanmoins
+néanmoins
+necessaire
+necessairement
+neuf
+neuvième
 ni
+nombreuses
+nommés
+non
 nos
+notamment
 notre
+nôtre
+nôtres
 nous
+nous-mêmes
+nouveau
+nouveaux
+nul
+o
+ô
+oh
+ohé
+olé
+ollé
 on
 ont
+onze
+onzième
+ore
 ou
 où
+ouf
+ouias
+oust
+ouste
+ouvert
+ouverte
+ouverts
+o|
+p
+paf
+pan
 par
+parce
+parfois
+parle
+parlent
+parler
+parmi
+parole
+parseme
+partant
+particulier
+particulière
+particulièrement
 pas
+passé
+pendant
+pense
+permet
+personne
+personnes
+peu
+peut
+peuvent
+peux
+pff
+pfft
+pfut
+pièce
+pif
+pire
+plein
+plupart
+plus
+plusieurs
+plutôt
+possessif
+possessifs
+possible
+possibles
+pouah
 pour
+pourquoi
+pourrais
+pourrait
+pouvait
+prealable
+precisement
+premier
+première
+premièrement
+pres
+près
+probable
+probante
+procedant
+proche
+psitt
+pu
+puis
+puisque
+pur
+pure
+q
 qu
 qu'elle
 qu'il
 qu'on
 qu'une
 quand
+quant
+quant-à-soi
+quanta
+quarante
+quatorze
+quatre
+quatre-vingt
+quatrième
+quatrièmement
 que
+quel
+quelconque
+quelle
+quelles
+quelqu'un
+quelque
+quelques
+quels
 qui
+quiconque
+quinze
+quoi
+quoique
+r
+rare
+rarement
+relative
+relativement
+rend
+rendre
+restant
+reste
+restent
+retour
+revoici
+revoilà
+rien
 s
 s'est
 sa
+sacrebleu
+sait
+sans
+sapristi
+sauf
 se
+sein
+seize
+selon
+semblable
+semblaient
+semble
+semblent
+sent
+sept
+septième
 sera
 serai
 seraient
@@ -141,45 +547,125 @@ serions
 serons
 seront
 ses
+seul
+seule
+seulement
 si
+sien
+sienne
+siennes
+siens
+sinon
+six
+sixième
+soi
+soi-même
 soient
 sois
 soit
+soixante
 sommes
 son
 sont
+sous
+souvent
 soyez
 soyons
+stop
+strictement
+suffit
 suis
+suit
+suivant
+suivante
+suivantes
+suivants
+suivre
+sujet
 sur
+surtout
 t
 ta
+tac
+tandis
+tant
+tardive
 te
+té
+telle
+tellement
+telles
+tels
+tenant
+tend
+tenir
+tente
 tes
+tic
+tien
+tienne
+tiennes
+tiens
+toc
 toi
+toi-même
 ton
+touchant
+toujours
+tous
+tout
+toute
+toutefois
+toutes
+treize
+trente
+tres
+très
+trois
+troisième
+troisièmement
+trop
+tsoin
+tsouin
 tu
+u
 un
 une
+unes
+uniformement
+unique
+uniques
+uns
+v
 va
 vais
+valeur
+vas
+vé
+vers
+via
+vif
+vifs
+vingt
+vivat
+vive
+vives
+vlan
+voici
+voie
+voient
+voilà
+voire
+vont
 vos
 votre
+vôtre
+vôtres
 vous
+vous-mêmes
+vu
+w
+x
 y
-à
-étaient
-étais
-était
-étant
-étante
-étantes
-étants
-étiez
-étions
-été
-étée
-étées
-étés
-êtes
-être
+z
+zut
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/fr/fr_stop_words_old.txt b/apps/common/src/python/mediawords/languages/fr/fr_stop_words_old.txt
new file mode 100644
index 0000000000..2f7ed427ca
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/fr/fr_stop_words_old.txt
@@ -0,0 +1,185 @@
+#
+# This is a stop word list for the French language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+a-t-on
+ai
+aie
+aient
+aies
+ait
+as
+au
+aura
+aurai
+auraient
+aurais
+aurait
+auras
+aurez
+auriez
+aurions
+aurons
+auront
+aux
+avaient
+avais
+avait
+avec
+avez
+aviez
+avions
+avons
+ayant
+ayante
+ayantes
+ayants
+ayez
+ayons
+c
+ce
+celle
+ces
+d
+d'une
+dans
+de
+des
+donc
+dont
+du
+elle
+en
+es
+est
+et
+eu
+eue
+eues
+eurent
+eus
+eusse
+eussent
+eusses
+eussiez
+eussions
+eut
+eux
+eûmes
+eût
+eûtes
+furent
+fus
+fusse
+fussent
+fusses
+fussiez
+fussions
+fut
+fûmes
+fût
+fûtes
+il
+ils
+j
+je
+l
+l'
+la
+le
+les
+leur
+leurs
+lui
+m
+ma
+mais
+me
+mes
+moi
+mon
+même
+n
+n'a
+n'est
+ne
+ni
+nos
+notre
+nous
+on
+ont
+ou
+où
+par
+pas
+pour
+qu
+qu'elle
+qu'il
+qu'on
+qu'une
+quand
+que
+qui
+s
+s'est
+sa
+se
+sera
+serai
+seraient
+serais
+serait
+seras
+serez
+seriez
+serions
+serons
+seront
+ses
+si
+soient
+sois
+soit
+sommes
+son
+sont
+soyez
+soyons
+suis
+sur
+t
+ta
+te
+tes
+toi
+ton
+tu
+un
+une
+va
+vais
+vos
+votre
+vous
+y
+à
+étaient
+étais
+était
+étant
+étante
+étantes
+étants
+étiez
+étions
+été
+étée
+étées
+étés
+êtes
+être
diff --git a/apps/common/src/python/mediawords/languages/ha/ha_stop_words.txt b/apps/common/src/python/mediawords/languages/ha/ha_stop_words.txt
index 07c7723d36..cc7896d042 100644
--- a/apps/common/src/python/mediawords/languages/ha/ha_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/ha/ha_stop_words.txt
@@ -1,9 +1,8 @@
 #
 # This is a stop word list for the Hausa language.
-#
 # Sources:
 #     https://github.com/stopwords-iso/stopwords-ha/blob/master/raw/gh-stopwords-json-ha.txt
-#
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 a
 amma
@@ -43,4 +42,4 @@ ya
 yake
 yana
 yi
-za
+za
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/ha/ha_stop_words_old.txt b/apps/common/src/python/mediawords/languages/ha/ha_stop_words_old.txt
new file mode 100644
index 0000000000..07c7723d36
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/ha/ha_stop_words_old.txt
@@ -0,0 +1,46 @@
+#
+# This is a stop word list for the Hausa language.
+#
+# Sources:
+#     https://github.com/stopwords-iso/stopwords-ha/blob/master/raw/gh-stopwords-json-ha.txt
+#
+
+a
+amma
+ba
+ban
+ce
+cikin
+da
+don
+ga
+in
+ina
+ita
+ji
+ka
+ko
+kuma
+lokacin
+ma
+mai
+na
+ne
+ni
+sai
+shi
+su
+suka
+sun
+ta
+tafi
+take
+tana
+wani
+wannan
+wata
+ya
+yake
+yana
+yi
+za
diff --git a/apps/common/src/python/mediawords/languages/hi/__init__.py b/apps/common/src/python/mediawords/languages/hi/__init__.py
index 4a98351b3b..12f6e60b8f 100644
--- a/apps/common/src/python/mediawords/languages/hi/__init__.py
+++ b/apps/common/src/python/mediawords/languages/hi/__init__.py
@@ -21,6 +21,9 @@ class HindiLanguage(StopWordsFromFileMixIn):
         # Stop words map
         '__stop_words_map',
 
+        # FIXME remove once stopword comparison is over
+        '__stop_words_old_map',
+
         # Hunspell instance
         '__hindi_hunspell',
 
diff --git a/apps/common/src/python/mediawords/languages/hi/hi_stop_words.txt b/apps/common/src/python/mediawords/languages/hi/hi_stop_words.txt
index 27440bfb15..0682f8985c 100644
--- a/apps/common/src/python/mediawords/languages/hi/hi_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/hi/hi_stop_words.txt
@@ -1,28 +1,34 @@
-#
-# This is a stop word list for the Hindi language.
-#
 # Sources:
-#     http://www.ranks.nl/stopwords/hindi
+#
 #     http://members.unine.ch/jacques.savoy/clef/hindiST.txt
-#     https://sites.google.com/site/kevinbouge/stopwords-lists
 #     http://resgtholpadi.blogspot.com/2012/07/hindi-stop-words-list.html
-#
+#     http://www.ranks.nl/stopwords/hindi
+#     https://github.com/stopwords-iso/stopwords-hi/blob/master/stopwords-hi.txt
+#     https://sites.google.com/site/kevinbouge/stopwords-lists
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 अंदर
 अत
 अथवा
+अदि
 अन्य
+अप
 अपना
+अपनि
 अपनी
 अपने
 अब
+अभि
 अभी
 आज
 आदि
 आप
+इंहिं
+इंहें
+इंहों
+इतयादि
 इत्यादि
 इन
-इन
 इनका
 इनके
 इन्हीं
@@ -30,38 +36,46 @@
 इन्हों
 इस
 इसका
+इसकि
 इसकी
 इसके
 इसमें
+इसि
 इसी
 इसे
+उंहिं
+उंहें
+उंहों
 उच्च
 उत्तर
 उन
 उनका
+उनकि
 उनकी
 उनके
 उनको
 उन्हीं
 उन्हें
-उन्हें
 उन्हों
 उस
 उसकी
 उसके
+उसि
 उसी
 उसे
 ऊपर
 एक
 एवं
 एस
+एसे
 ऐसा
 ऐसे
+ओर
 और
+कइ
 कई
 कभी
 कम
-कर
 करता
 करते
 करना
@@ -71,15 +85,16 @@
 कहते
 कहा
 का
+काफि
 काफ़ी
 कि
+किंहें
+किंहों
 किए
 कितना
 किन्हें
 किन्हों
 किया
-किर
-किस
 किसी
 किसे
 की
@@ -87,7 +102,10 @@
 कुल
 के
 को
+कोइ
 कोई
+कोन
+कोनसा
 कौन
 कौनसा
 गई
@@ -95,15 +113,18 @@
 गया
 गयी
 गये
-घर
 जब
 जहाँ
+जहां
 जा
 जाता
 जाती
 जाते
 जाने
+जिंहें
+जिंहों
 जितना
+जिधर
 जिन
 जिन्हें
 जिन्हों
@@ -112,6 +133,8 @@
 जिससे
 जिसे
 जीधर
+जेसा
+जेसे
 जैसा
 जैसे
 जो
@@ -119,6 +142,8 @@
 तथा
 तब
 तरह
+तिंहें
+तिंहों
 तिन
 तिन्हें
 तिन्हों
@@ -127,26 +152,31 @@
 तुम
 तो
 था
+थि
 थी
 थे
 दबारा
+दवारा
 दिया
 दुसरा
+दुसरे
 दूर
 दूसरे
 दो
 दोनों
 द्वारा
 न
+नहिं
 नहीं
 ना
+निचे
 निहायत
 नीचे
 ने
 पर
-पर
 परंतु
 पहले
+पुरा
 पूरा
 पूरे
 पे
@@ -154,14 +184,17 @@
 फिर
 बड़ा
 बड़े
+बनि
 बनी
-बही
+बहि
 बहुत
 बाद
 बाला
 बाहर
 बिलकुल
 बीच
+भि
+भितर
 भी
 भीतर
 मगर
@@ -174,11 +207,13 @@
 यह
 यहाँ
 यहां
+यहि
 यही
 या
 यिह
 ये
 रखें
+रवासा
 रहती
 रहा
 रहे
@@ -189,10 +224,12 @@
 लेकर
 लेकिन
 व
+वगेरह
 वर्ग
 वह
-वह
 वहाँ
+वहां
+वहिं
 वहीं
 वाले
 वुह
@@ -203,26 +240,32 @@
 सकती
 सकते
 सबसे
+सभि
 सभी
 समय
 साथ
 साबुत
-साभ
 सारा
 से
 सो
 स्थान
+हि
 ही
+हुअ
 हुआ
+हुइ
 हुई
 हुए
 हुये
+हे
+हें
 है
 हैं
 हो
 होता
+होति
 होती
 होते
 होना
 होने
-﻿के
+﻿के
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/hi/hi_stop_words_old.txt b/apps/common/src/python/mediawords/languages/hi/hi_stop_words_old.txt
new file mode 100644
index 0000000000..27440bfb15
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/hi/hi_stop_words_old.txt
@@ -0,0 +1,228 @@
+#
+# This is a stop word list for the Hindi language.
+#
+# Sources:
+#     http://www.ranks.nl/stopwords/hindi
+#     http://members.unine.ch/jacques.savoy/clef/hindiST.txt
+#     https://sites.google.com/site/kevinbouge/stopwords-lists
+#     http://resgtholpadi.blogspot.com/2012/07/hindi-stop-words-list.html
+#
+
+अंदर
+अत
+अथवा
+अन्य
+अपना
+अपनी
+अपने
+अब
+अभी
+आज
+आदि
+आप
+इत्यादि
+इन
+इन
+इनका
+इनके
+इन्हीं
+इन्हें
+इन्हों
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उच्च
+उत्तर
+उन
+उनका
+उनकी
+उनके
+उनको
+उन्हीं
+उन्हें
+उन्हें
+उन्हों
+उस
+उसकी
+उसके
+उसी
+उसे
+ऊपर
+एक
+एवं
+एस
+ऐसा
+ऐसे
+और
+कई
+कभी
+कम
+कर
+करता
+करते
+करना
+करने
+करें
+कल
+कहते
+कहा
+का
+काफ़ी
+कि
+किए
+कितना
+किन्हें
+किन्हों
+किया
+किर
+किस
+किसी
+किसे
+की
+कुछ
+कुल
+के
+को
+कोई
+कौन
+कौनसा
+गई
+गए
+गया
+गयी
+गये
+घर
+जब
+जहाँ
+जा
+जाता
+जाती
+जाते
+जाने
+जितना
+जिन
+जिन्हें
+जिन्हों
+जिस
+जिसमें
+जिससे
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तथा
+तब
+तरह
+तिन
+तिन्हें
+तिन्हों
+तिस
+तिसे
+तुम
+तो
+था
+थी
+थे
+दबारा
+दिया
+दुसरा
+दूर
+दूसरे
+दो
+दोनों
+द्वारा
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+परंतु
+पहले
+पूरा
+पूरे
+पे
+प्रति
+फिर
+बड़ा
+बड़े
+बनी
+बही
+बहुत
+बाद
+बाला
+बाहर
+बिलकुल
+बीच
+भी
+भीतर
+मगर
+मध्य
+मानो
+मे
+में
+मै
+यदि
+यह
+यहाँ
+यहां
+यही
+या
+यिह
+ये
+रखें
+रहती
+रहा
+रहे
+ऱ्वासा
+लिए
+लिया
+लिये
+लेकर
+लेकिन
+व
+वर्ग
+वह
+वह
+वहाँ
+वहीं
+वाले
+वुह
+वे
+वग़ैरह
+संग
+सकता
+सकती
+सकते
+सबसे
+सभी
+समय
+साथ
+साबुत
+साभ
+सारा
+से
+सो
+स्थान
+ही
+हुआ
+हुई
+हुए
+हुये
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+﻿के
diff --git a/apps/common/src/python/mediawords/languages/hu/hu_stop_words.txt b/apps/common/src/python/mediawords/languages/hu/hu_stop_words.txt
index 13c70d9d6f..da87882c7a 100644
--- a/apps/common/src/python/mediawords/languages/hu/hu_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/hu/hu_stop_words.txt
@@ -1,206 +1,792 @@
-#
-# This is a stop word list for the Hungarian language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-hu/blob/master/stopwords-hu.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 a
+abba
 abban
+abból
+addig
 ahhoz
 ahogy
 ahol
 aki
 akik
 akkor
+akár
+alapján
 alatt
+alatta
+alattad
+alattam
+alattatok
+alattuk
+alattunk
+alá
+alád
+alájuk
+alám
+alánk
+alátok
+alól
+alóla
+alólad
+alólam
+alólatok
+alóluk
+alólunk
 amely
+amelybol
 amelyek
 amelyekben
 amelyeket
 amelyet
+amelyik
 amelynek
 ami
 amikor
 amit
 amolyan
+amott
 amíg
 annak
+annál
 arra
 arról
+attól
 az
+aznap
 azok
+azokat
+azokba
+azokban
+azokból
+azokhoz
+azokig
+azokkal
+azokká
+azoknak
+azoknál
+azokon
+azokra
+azokról
+azoktól
+azokért
 azon
 azonban
+azonnal
 azt
 aztán
 azután
 azzal
+azzá
 azért
+bal
+balra
+ban
 be
+belé
+beléd
+beléjük
+belém
+belénk
+belétek
 belül
+belőle
+belőled
+belőlem
+belőletek
+belőlük
+belőlünk
+ben
 benne
+benned
+bennem
+bennetek
+bennük
+bennünk
 bár
+bárcsak
+bármilyen
+búcsú
 cikk
 cikkek
 cikkeket
 csak
+csakhogy
+csupán
 de
+dehogy
 e
+ebbe
 ebben
+ebből
 eddig
 egy
+egyebek
+egyebet
+egyedül
+egyelőre
 egyes
+egyet
 egyetlen
 egyik
+egymás
 egyre
+egyszerre
 egyéb
+együtt
 egész
+egészen
 ehhez
 ekkor
 el
+eleinte
 ellen
+ellenes
+elleni
+ellenére
+elmondta
 elsõ
+első
+elsők
+elsősorban
+elsőt
+elé
+eléd
 elég
+eléjük
+elém
+elénk
+elétek
 elõ
 elõször
 elõtt
+elő
+előbb
+elől
+előle
+előled
+előlem
+előletek
+előlük
+előlünk
+először
+előtt
+előtte
+előtted
+előttem
+előttetek
+előttük
+előttünk
+előző
 emilyen
+engem
 ennek
+ennyi
+ennél
+enyém
 erre
+erről
+esetben
+ettől
 ez
 ezek
+ezekbe
+ezekben
+ezekből
+ezeken
+ezeket
+ezekhez
+ezekig
+ezekkel
+ezekké
+ezeknek
+ezeknél
+ezekre
+ezekről
+ezektől
+ezekért
 ezen
+ezentúl
+ezer
+ezret
 ezt
+ezután
 ezzel
+ezzé
 ezért
 fel
+fele
+felek
+felet
+felett
 felé
+fent
+fenti
+fél
+fölé
+gyakran
+ha
+halló
+hamar
 hanem
+harmadik
+harmadikat
+harminc
+hat
+hatodik
+hatodikat
+hatot
+hatvan
+helyett
+hetedik
+hetediket
+hetet
+hetven
+hirtelen
 hiszen
+hiába
 hogy
 hogyan
+hol
+holnap
+holnapot
+honnan
+hova
+hozzá
+hozzád
+hozzájuk
+hozzám
+hozzánk
+hozzátok
+hurrá
+huszadik
+hány
+hányszor
+hármat
+három
+hát
+hátha
+hátulsó
+hét
+húsz
+ide
+ide-оda
+idén
+igazán
 igen
 ill
 ill.
 illetve
 ilyen
 ilyenkor
+immár
+inkább
+is
 ismét
 ison
 itt
+jelenleg
 jobban
+jobbra
 jó
 jól
+jólesik
+jóval
+jövőre
 kell
+kellene
 kellett
+kelljen
 keressünk
 keresztül
+ketten
+kettő
+kettőt
+kevés
 ki
+kiben
+kiből
+kicsit
+kicsoda
+kihez
+kik
+kikbe
+kikben
+kikből
+kiken
+kiket
+kikhez
+kikkel
+kikké
+kiknek
+kiknél
+kikre
+kikről
+kiktől
+kikért
+kilenc
+kilencedik
+kilencediket
+kilencet
+kilencven
+kin
+kinek
+kinél
+kire
+kiről
+kit
+kitől
+kivel
+kivé
+kié
+kiért
+korábban
+képest
+kérem
+kérlek
+kész
+késő
+később
+későn
+két
+kétszer
 kívül
+körül
+köszönhetően
+köszönöm
+közben
+közel
+közepesen
+közepén
+közé
 között
 közül
+külön
+különben
+különböző
+különbözőbb
+különbözőek
+lassan
+le
 legalább
 legyen
 lehet
+lehetetlen
 lehetett
+lehetőleg
+lehetőség
 lenne
 lenni
+lennék
+lennének
 lesz
+leszek
+lesznek
+leszünk
 lett
+lettek
+lettem
+lettünk
+lévő
+ma
 maga
+magad
+magam
+magatokat
+magukat
+magunkat
 magát
+mai
 majd
-majd
+majdnem
+manapság
 meg
+megcsinál
+megcsinálnak
+megint
+megvan
 mellett
+mellette
+melletted
+mellettem
+mellettetek
+mellettük
+mellettünk
+mellé
+melléd
+melléjük
+mellém
+mellénk
+mellétek
+mellől
+mellőle
+mellőled
+mellőlem
+mellőletek
+mellőlük
+mellőlünk
 mely
 melyek
+melyik
+mennyi
 mert
 mi
+miatt
+miatta
+miattad
+miattam
+miattatok
+miattuk
+miattunk
+mibe
+miben
+miből
+mihez
+mik
+mikbe
+mikben
+mikből
+miken
+miket
+mikhez
+mikkel
+mikké
+miknek
+miknél
 mikor
+mikre
+mikről
+miktől
+mikért
 milyen
+min
+mind
+mindegyik
+mindegyiket
 minden
+mindenesetre
 mindenki
 mindent
+mindenütt
 mindig
+mindketten
+minek
 mint
 mintha
+minél
+mire
+miről
 mit
+mitől
 mivel
+mivé
 miért
+mondta
 most
+mostanáig
 már
 más
 másik
+másikat
+másnap
+második
+másodszor
+mások
+másokat
+mást
 még
+mégis
 míg
+mögé
+mögéd
+mögéjük
+mögém
+mögénk
+mögétek
+mögött
+mögötte
+mögötted
+mögöttem
+mögöttetek
+mögöttük
+mögöttünk
+mögüle
+mögüled
+mögülem
+mögületek
+mögülük
+mögülünk
+múltkor
+múlva
+na
 nagy
 nagyobb
 nagyon
+naponta
+napot
 ne
+negyedik
+negyediket
+negyven
+neked
 nekem
 neki
+nekik
+nektek
+nekünk
 nem
+nemcsak
+nemrég
 nincs
+nyolc
+nyolcadik
+nyolcadikat
+nyolcat
+nyolcvan
+nála
+nálad
+nálam
+nálatok
+náluk
+nálunk
+négy
+négyet
 néha
 néhány
 nélkül
+o
+oda
+ok
 olyan
+onnan
 ott
 pedig
 persze
+pár
+például
+rajta
+rajtad
+rajtam
+rajtatok
+rajtuk
+rajtunk
+rendben
+rosszul
 rá
+rád
+rájuk
+rám
+ránk
+rátok
+régen
+régóta
+részére
+róla
+rólad
+rólam
+rólatok
+róluk
+rólunk
+rögtön
 s
 saját
+se
 sem
 semmi
+semmilyen
+semmiség
+senki
+soha
 sok
+sokan
 sokat
 sokkal
+sokszor
+sokáig
+során
+stb.
 szemben
 szerint
+szerinte
+szerinted
+szerintem
+szerintetek
+szerintük
+szerintünk
+szervusz
 szinte
 számára
+száz
+századik
+százat
+szépen
+szét
+szíves
+szívesen
+szíveskedjék
+sőt
 talán
+tavaly
+te
+tegnap
+tegnapelőtt
 tehát
+tele
 teljes
+tessék
+ti
+tied
+titeket
+tizedik
+tizediket
+tizenegy
+tizenegyedik
+tizenhat
+tizenhárom
+tizenhét
+tizenkettedik
+tizenkettő
+tizenkilenc
+tizenkét
+tizennyolc
+tizennégy
+tizenöt
+tizet
 tovább
+további
 továbbá
+távol
+téged
+tényleg
+tíz
 több
-ugyanis
+többi
+többször
+túl
+tőle
+tőled
+tőlem
+tőletek
+tőlük
+tőlünk
+ugyanakkor
+ugyanez
+ugyani
+ugye
+urak
+uram
+urat
+utoljára
 utolsó
 után
 utána
 vagy
 vagyis
 vagyok
+vagytok
+vagyunk
+vajon
+valahol
 valaki
+valakit
+valamelyik
 valami
 valamint
 való
 van
 vannak
 vele
+veled
+velem
+veletek
+velük
+velünk
 vissza
+viszlát
 viszont
+viszontlátásra
 volna
+volnának
+volnék
 volt
 voltak
 voltam
 voltunk
+végre
+végén
+végül
 által
 általában
+ám
 át
+éljen
 én
 éppen
+érte
+érted
+értem
+értetek
+értük
+értünk
 és
+év
+évben
+éve
+évek
+éves
+évi
+évvel
 így
+óta
 õ
 õk
 õket
+ön
+önbe
+önben
+önből
+önhöz
+önnek
+önnel
+önnél
+önre
+önről
+önt
+öntől
+önért
+önök
+önökbe
+önökben
+önökből
+önöket
+önökhöz
+önökkel
+önöknek
+önöknél
+önökre
+önökről
+önöktől
+önökért
+önökön
+önön
 össze
+öt
+ötven
+ötödik
+ötödiket
+ötöt
 úgy
+úgyis
+úgynevezett
 új
 újabb
 újra
+úr
+ő
+ők
+őket
+őt
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/hu/hu_stop_words_old.txt b/apps/common/src/python/mediawords/languages/hu/hu_stop_words_old.txt
new file mode 100644
index 0000000000..13c70d9d6f
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/hu/hu_stop_words_old.txt
@@ -0,0 +1,206 @@
+#
+# This is a stop word list for the Hungarian language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+a
+abban
+ahhoz
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amikor
+amit
+amolyan
+amíg
+annak
+arra
+arról
+az
+azok
+azon
+azonban
+azt
+aztán
+azután
+azzal
+azért
+be
+belül
+benne
+bár
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+ebben
+eddig
+egy
+egyes
+egyetlen
+egyik
+egyre
+egyéb
+egész
+ehhez
+ekkor
+el
+ellen
+elsõ
+elég
+elõ
+elõször
+elõtt
+emilyen
+ennek
+erre
+ez
+ezek
+ezen
+ezt
+ezzel
+ezért
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+ill
+ill.
+illetve
+ilyen
+ilyenkor
+ismét
+ison
+itt
+jobban
+jó
+jól
+kell
+kellett
+keressünk
+keresztül
+ki
+kívül
+között
+közül
+legalább
+legyen
+lehet
+lehetett
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+meg
+mellett
+mely
+melyek
+mert
+mi
+mikor
+milyen
+minden
+mindenki
+mindent
+mindig
+mint
+mintha
+mit
+mivel
+miért
+most
+már
+más
+másik
+még
+míg
+nagy
+nagyobb
+nagyon
+ne
+nekem
+neki
+nem
+nincs
+néha
+néhány
+nélkül
+olyan
+ott
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+szemben
+szerint
+szinte
+számára
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+ugyanis
+utolsó
+után
+utána
+vagy
+vagyis
+vagyok
+valaki
+valami
+valamint
+való
+van
+vannak
+vele
+vissza
+viszont
+volna
+volt
+voltak
+voltam
+voltunk
+által
+általában
+át
+én
+éppen
+és
+így
+õ
+õk
+õket
+össze
+úgy
+új
+újabb
+újra
diff --git a/apps/common/src/python/mediawords/languages/it/it_stop_words.txt b/apps/common/src/python/mediawords/languages/it/it_stop_words.txt
index 4448e81c70..85e66e3ba8 100644
--- a/apps/common/src/python/mediawords/languages/it/it_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/it/it_stop_words.txt
@@ -1,27 +1,55 @@
-#
-# This is a stop word list for the Italian language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-it/blob/master/stopwords-it.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 a
+abbastanza
 abbia
 abbiamo
 abbiano
 abbiate
+accidenti
 ad
+adesso
+affinché
 agl
 agli
+ahime
+ahimè
 ai
 al
+alcuna
+alcuni
+alcuno
 all
 alla
 alle
 allo
+allora
+altre
+altri
+altrimenti
+altro
+altrove
+altrui
 anche
+ancora
+anni
+anno
+ansa
+anticipo
+assai
+attesa
+attraverso
+avanti
 avemmo
 avendo
+avente
+aver
+avere
+averlo
 avesse
 avessero
 avessi
@@ -35,6 +63,7 @@ avevano
 avevate
 avevi
 avevo
+avrà
 avrai
 avranno
 avrebbe
@@ -45,22 +74,67 @@ avremo
 avreste
 avresti
 avrete
-avrà
 avrò
 avuta
 avute
 avuti
 avuto
+basta
+ben
+bene
+benissimo
+brava
+bravo
+buono
 c
+caso
+cento
+certa
+certe
+certi
+certo
 che
 chi
+chicchessia
+chiunque
 ci
+ciascuna
+ciascuno
+cima
+cinque
+cio
+ciò
+cioe
+cioè
+circa
+citta
+città
+co
+codesta
+codesti
+codesto
 coi
 col
+colei
+coll
+coloro
+colui
 come
+cominci
+comprare
+comunque
 con
+concernente
+conclusione
+consecutivi
+consecutivo
 contro
+cos
+cosa
+cosi
+così
 cui
+d
 da
 dagl
 dagli
@@ -70,6 +144,8 @@ dall
 dalla
 dalle
 dallo
+dappertutto
+davanti
 degl
 degli
 dei
@@ -78,25 +154,58 @@ dell
 della
 delle
 dello
+dentro
+detto
+deve
+devo
 di
+dice
+dietro
+dire
+dirimpetto
+diventa
+diventare
+diventato
+dopo
+doppio
 dov
 dove
+dovra
+dovrà
+dovunque
+due
+dunque
+durante
 e
+è
 ebbe
 ebbero
 ebbi
+ecc
+ecco
 ed
+effettivamente
+egli
+ella
+entrambi
+eppure
 era
 erano
 eravamo
 eravate
 eri
 ero
+esempio
+esse
 essendo
+esser
+essere
+essi
+ex
+fa
 faccia
 facciamo
 facciano
-facciate
 faccio
 facemmo
 facendo
@@ -114,8 +223,10 @@ facevi
 facevo
 fai
 fanno
+farà
 farai
 faranno
+fare
 farebbe
 farebbero
 farei
@@ -124,44 +235,112 @@ faremo
 fareste
 faresti
 farete
-farà
-farò
+fatto
+favore
 fece
 fecero
 feci
+fin
+finalmente
+finche
+fine
+fino
+forse
+forza
 fosse
 fossero
 fossi
 fossimo
 foste
 fosti
+fra
+frattempo
 fu
 fui
 fummo
+fuori
 furono
+futuro
+generale
+gente
+gia
+già
+giorni
+giorno
+giu
 gli
+gliela
+gliele
+glieli
+glielo
+gliene
+grande
+grazie
+gruppo
 ha
+haha
 hai
 hanno
 ho
 i
+ie
+ieri
 il
 in
+inc
+indietro
+infatti
+inoltre
+insieme
+intanto
+intorno
+invece
 io
 l
 la
+là
+lasciato
+lato
 le
 lei
 li
 lo
+lontano
 loro
 lui
+lungo
+luogo
 ma
+macche
+magari
+mai
+male
+malgrado
+malissimo
+me
+medesimo
+mediante
+meglio
+meno
+mentre
+mesi
+mezzo
 mi
 mia
 mie
 miei
+mila
+miliardi
+milioni
+minimi
 mio
+modo
+molta
+molti
+moltissimo
+molto
+momento
+mondo
 ne
 negl
 negli
@@ -171,29 +350,127 @@ nell
 nella
 nelle
 nello
+nemmeno
+neppure
+nessun
+nessuna
+nessuno
+niente
+no
 noi
+nome
 non
+nondimeno
+nonostante
+nonsia
 nostra
 nostre
 nostri
 nostro
+novanta
+nove
+nulla
+nuovi
+nuovo
 o
+od
+oggi
+ogni
+ognuna
+ognuno
+oltre
+oppure
+ora
+ore
+osi
+ossia
+ottanta
+otto
+paese
+parecchi
+parecchie
+parecchio
+parte
+partendo
+peccato
+peggio
 per
+perche
 perché
+perchè
+percio
+perciò
+perfino
+pero
+però
+persino
+persone
+piedi
+pieno
+piglia
+piu
 più
+piuttosto
+po
+pochissimo
+poco
+poi
+poiche
+possa
+possedere
+posteriore
+posto
+potrebbe
+preferibilmente
+presa
+press
+prima
+primo
+principalmente
+probabilmente
+promesso
+proprio
+puo
+può
+pure
+purtroppo
+qua
+qualche
+qualcosa
+qualcuna
+qualcuno
 quale
+quali
+qualunque
+quando
 quanta
 quante
 quanti
 quanto
+quantunque
+quarto
+quasi
+quattro
+quel
 quella
 quelle
 quelli
 quello
+quest
 questa
 queste
 questi
 questo
+qui
+quindi
+quinto
+realmente
+recente
+recentemente
+riecco
+salvo
+sara
+sarà
 sarai
 saranno
 sarebbe
@@ -204,21 +481,41 @@ saremo
 sareste
 saresti
 sarete
-sarà
 sarò
+scorso
 se
+secondo
+seguente
+seguito
 sei
+sembra
+sembrare
+sembrato
+sembrava
+sembri
+sempre
+senza
+sette
 si
 sia
 siamo
 siano
 siate
 siete
+sig
+solito
+solo
+soltanto
 sono
+sopra
+soprattutto
+sotto
+spesso
 sta
 stai
 stando
 stanno
+starà
 starai
 staranno
 starebbe
@@ -229,8 +526,11 @@ staremo
 stareste
 staresti
 starete
-starà
 starò
+stata
+state
+stati
+stato
 stava
 stavamo
 stavano
@@ -238,10 +538,12 @@ stavate
 stavi
 stavo
 stemmo
+stessa
 stesse
 stessero
 stessi
 stessimo
+stesso
 steste
 stesti
 stette
@@ -254,6 +556,9 @@ stiate
 sto
 su
 sua
+subito
+successivamente
+successivo
 sue
 sugl
 sugli
@@ -265,22 +570,56 @@ sulle
 sullo
 suo
 suoi
+tale
+tali
+talvolta
+tanto
+te
+tempo
+terzo
+th
 ti
+titolo
 tra
+tranne
+tre
+trenta
+triplo
+troppo
+trovato
 tu
 tua
 tue
 tuo
 tuoi
+tutta
+tuttavia
+tutte
 tutti
 tutto
+uguali
+ulteriore
+ultimo
 un
 una
 uno
+uomo
+va
+vai
+vale
+vari
+varia
+varie
+vario
+verso
 vi
+vicino
+visto
+vita
 voi
+volta
+volte
 vostra
 vostre
 vostri
-vostro
-è
+vostro
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/it/it_stop_words_old.txt b/apps/common/src/python/mediawords/languages/it/it_stop_words_old.txt
new file mode 100644
index 0000000000..4448e81c70
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/it/it_stop_words_old.txt
@@ -0,0 +1,286 @@
+#
+# This is a stop word list for the Italian language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+a
+abbia
+abbiamo
+abbiano
+abbiate
+ad
+agl
+agli
+ai
+al
+all
+alla
+alle
+allo
+anche
+avemmo
+avendo
+avesse
+avessero
+avessi
+avessimo
+aveste
+avesti
+avete
+aveva
+avevamo
+avevano
+avevate
+avevi
+avevo
+avrai
+avranno
+avrebbe
+avrebbero
+avrei
+avremmo
+avremo
+avreste
+avresti
+avrete
+avrà
+avrò
+avuta
+avute
+avuti
+avuto
+c
+che
+chi
+ci
+coi
+col
+come
+con
+contro
+cui
+da
+dagl
+dagli
+dai
+dal
+dall
+dalla
+dalle
+dallo
+degl
+degli
+dei
+del
+dell
+della
+delle
+dello
+di
+dov
+dove
+e
+ebbe
+ebbero
+ebbi
+ed
+era
+erano
+eravamo
+eravate
+eri
+ero
+essendo
+faccia
+facciamo
+facciano
+facciate
+faccio
+facemmo
+facendo
+facesse
+facessero
+facessi
+facessimo
+faceste
+facesti
+faceva
+facevamo
+facevano
+facevate
+facevi
+facevo
+fai
+fanno
+farai
+faranno
+farebbe
+farebbero
+farei
+faremmo
+faremo
+fareste
+faresti
+farete
+farà
+farò
+fece
+fecero
+feci
+fosse
+fossero
+fossi
+fossimo
+foste
+fosti
+fu
+fui
+fummo
+furono
+gli
+ha
+hai
+hanno
+ho
+i
+il
+in
+io
+l
+la
+le
+lei
+li
+lo
+loro
+lui
+ma
+mi
+mia
+mie
+miei
+mio
+ne
+negl
+negli
+nei
+nel
+nell
+nella
+nelle
+nello
+noi
+non
+nostra
+nostre
+nostri
+nostro
+o
+per
+perché
+più
+quale
+quanta
+quante
+quanti
+quanto
+quella
+quelle
+quelli
+quello
+questa
+queste
+questi
+questo
+sarai
+saranno
+sarebbe
+sarebbero
+sarei
+saremmo
+saremo
+sareste
+saresti
+sarete
+sarà
+sarò
+se
+sei
+si
+sia
+siamo
+siano
+siate
+siete
+sono
+sta
+stai
+stando
+stanno
+starai
+staranno
+starebbe
+starebbero
+starei
+staremmo
+staremo
+stareste
+staresti
+starete
+starà
+starò
+stava
+stavamo
+stavano
+stavate
+stavi
+stavo
+stemmo
+stesse
+stessero
+stessi
+stessimo
+steste
+stesti
+stette
+stettero
+stetti
+stia
+stiamo
+stiano
+stiate
+sto
+su
+sua
+sue
+sugl
+sugli
+sui
+sul
+sull
+sulla
+sulle
+sullo
+suo
+suoi
+ti
+tra
+tu
+tua
+tue
+tuo
+tuoi
+tutti
+tutto
+un
+una
+uno
+vi
+voi
+vostra
+vostre
+vostri
+vostro
+è
diff --git a/apps/common/src/python/mediawords/languages/ja/ja_stop_words.txt b/apps/common/src/python/mediawords/languages/ja/ja_stop_words.txt
index bfff6d32ff..6ec40c9b08 100755
--- a/apps/common/src/python/mediawords/languages/ja/ja_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/ja/ja_stop_words.txt
@@ -1,10 +1,9 @@
-#
 # This is a stop word list for the Japanese language.
-#
 # Sources:
+#
 #     https://github.com/stopwords/japanese-stopwords/blob/master/data/japanese-stopwords.txt
 #     Lucene's stopwords_ja.txt
-#
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 $
 %
@@ -50,7 +49,6 @@ url
 いつ
 いま
 います
-いや
 いる
 いろいろ
 う
@@ -92,13 +90,11 @@ url
 これから
 これら
 ご
-ごっちゃ
 ごと
 ごろ
 さ
 さま
 さまざま
-さらい
 さらに
 される
 さん
@@ -252,7 +248,6 @@ url
 ひと
 ひとつ
 ふく
-ぶり
 へ
 への
 へん
@@ -360,7 +355,6 @@ url
 作
 作ら
 例
-係
 俺
 個
 億
@@ -400,38 +394,15 @@ url
 向け
 向こう
 和
-哀
-品
-員
-喜
-器
 四
 回
-国
-土
 在
-地
-報じ
-場
-場合
-境
-士
-夏
-外
 多く
 大
 女
 奴
 婦
 子
-字
-安
-官
-室
-家
-対
-小
-屋
 巡る
 左
 市
@@ -442,79 +413,49 @@ url
 店
 府
 度
-式
 形
 役
 彼
 彼女
 後
-怒
-思わ
-性
-情
-感
-感じ
 我々
 所
 手
 手段
-扱い
 数
 文
 新た
-新着
 方
 方法
 日
-春
 時
 時点
 時間
-更新
-書
 月
 期
-木
 未満
-末
-本
 本当
-村
-束
-枚
-校
-楽
 様
 様々
 次
 歳
-歴
 段
 毎
 毎日
-気
-水
 求め
-法
-派
-火
 点
-版
 特に
 玉
 用
 男
 町
-界
 略
 百
-的
 目
 相
 県
 確か
 示し
-社
 私
 私達
 秋
@@ -530,7 +471,6 @@ url
 結局
 続き
 線
-署
 考え
 者
 自体
@@ -544,34 +484,23 @@ url
 計
 話
 話し
-誌
 語っ
 読む
 誰
 課
-調べ
-論
 貴方
 貴方方
 輪
 近く
 述べ
-通
-速報
-連
 週
 道
 達
 違い
 選
-部
-都
-金
-銭
 開か
 間
 関
-関係
 関連
 際
 集
@@ -633,4 +562,4 @@ url
 ￫
 ￬
 ￭
-￮
+￮
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/ja/ja_stop_words_old.txt b/apps/common/src/python/mediawords/languages/ja/ja_stop_words_old.txt
new file mode 100755
index 0000000000..bfff6d32ff
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/ja/ja_stop_words_old.txt
@@ -0,0 +1,636 @@
+#
+# This is a stop word list for the Japanese language.
+#
+# Sources:
+#     https://github.com/stopwords/japanese-stopwords/blob/master/data/japanese-stopwords.txt
+#     Lucene's stopwords_ja.txt
+#
+
+$
+%
+&
+@
+lwa
+posted
+ref
+url
+”
+…
+▽
+、
+。
+「
+」
+『
+』
+【
+】
+あそこ
+あたり
+あちら
+あっ
+あった
+あっち
+あと
+あな
+あなた
+あの
+あのかた
+あの人
+あり
+ありました
+あります
+ある
+あれ
+い
+いい
+いう
+いく
+いくつ
+いつ
+いま
+います
+いや
+いる
+いろいろ
+う
+うち
+え
+お
+おおまか
+おまえ
+およそ
+および
+おり
+おります
+おれ
+か
+かく
+かたち
+かつて
+かも
+かやの
+から
+が
+がい
+がら
+き
+きき
+きた
+くせ
+ください
+くれ
+くれる
+けど
+こうした
+ここ
+こちら
+こっち
+こと
+この
+これ
+これから
+これら
+ご
+ごっちゃ
+ごと
+ごろ
+さ
+さま
+さまざま
+さらい
+さらに
+される
+さん
+し
+しか
+しかし
+しかた
+した
+したい
+して
+しまう
+します
+しまっ
+しよう
+しれ
+しろ
+じゃ
+す
+すか
+すでに
+すね
+すべて
+する
+すれ
+ず
+ずつ
+せ
+せる
+ぜんぶ
+そう
+そこ
+そして
+そちら
+そっち
+そで
+その
+その他
+その後
+それ
+それから
+それぞれ
+それで
+それと
+それなり
+それに
+そんな
+た
+たい
+たくさん
+ただ
+ただし
+たち
+たび
+ため
+たら
+たり
+だ
+だけ
+だっ
+だめ
+だれ
+だろ
+ちゃ
+ちゃん
+った
+って
+つ
+ついに
+つつ
+て
+てる
+てん
+で
+でき
+できる
+でし
+でしょ
+です
+では
+でも
+と
+という
+といった
+とおり
+とか
+とき
+ところ
+として
+とって
+とともに
+となる
+とは
+とも
+と共に
+どう
+どういう
+どこ
+どこか
+どちら
+どっか
+どっち
+どの
+どれ
+な
+ない
+なお
+なか
+なかっ
+なかば
+ながら
+なく
+なけれ
+なし
+なぜ
+なっ
+なった
+など
+なに
+なのか
+なのに
+なら
+なり
+なる
+なん
+なんか
+に
+において
+における
+について
+にて
+にとって
+によって
+により
+による
+に対し
+に対して
+に対する
+に関して
+に関する
+ね
+の
+ので
+のに
+のみ
+は
+はじめ
+はず
+はるか
+ば
+ばかり
+ひと
+ひとつ
+ふく
+ぶり
+へ
+への
+へん
+べき
+べつ
+ぺん
+ほう
+ほか
+ほとんど
+ほど
+ま
+まさ
+まし
+ましょ
+ます
+ませ
+また
+または
+まで
+まとも
+まま
+み
+みたい
+みつ
+みなさん
+みんな
+も
+もし
+もしくは
+もっと
+もと
+もの
+ものの
+もん
+や
+やすい
+やっ
+やつ
+よ
+よう
+ような
+よく
+よそ
+より
+よる
+よると
+ら
+られ
+られる
+れ
+れる
+ろ
+わ
+わけ
+わたし
+を
+を通じて
+ん
+エラー
+カ所
+カ月
+キロ
+センチ
+ページ
+メートル
+レ
+ヵ所
+ヵ月
+ヶ所
+ヶ月
+・
+ー
+一
+一つ
+一方
+一覧
+七
+万
+三
+上
+上記
+下
+下記
+中
+九
+事
+二
+五
+人
+今
+今回
+他
+代
+以上
+以下
+以前
+以後
+以降
+会
+伸
+位
+体
+何
+何人
+作
+作ら
+例
+係
+俺
+個
+億
+元
+兆
+先
+全部
+八
+六
+内
+円
+再
+冬
+出
+分
+列
+別
+前
+前回
+力
+化
+匹
+区
+十
+千
+半ば
+及び
+受け
+口
+台
+右
+各
+同
+同じ
+名
+名前
+向け
+向こう
+和
+哀
+品
+員
+喜
+器
+四
+回
+国
+土
+在
+地
+報じ
+場
+場合
+境
+士
+夏
+外
+多く
+大
+女
+奴
+婦
+子
+字
+安
+官
+室
+家
+対
+小
+屋
+巡る
+左
+市
+席
+年
+年生
+幾つ
+店
+府
+度
+式
+形
+役
+彼
+彼女
+後
+怒
+思わ
+性
+情
+感
+感じ
+我々
+所
+手
+手段
+扱い
+数
+文
+新た
+新着
+方
+方法
+日
+春
+時
+時点
+時間
+更新
+書
+月
+期
+木
+未満
+末
+本
+本当
+村
+束
+枚
+校
+楽
+様
+様々
+次
+歳
+歴
+段
+毎
+毎日
+気
+水
+求め
+法
+派
+火
+点
+版
+特に
+玉
+用
+男
+町
+界
+略
+百
+的
+目
+相
+県
+確か
+示し
+社
+私
+私達
+秋
+秒
+第
+等
+箇所
+箇月
+簿
+系
+紀
+約
+結局
+続き
+線
+署
+考え
+者
+自体
+自分
+行
+行わ
+見
+見る
+観
+言わ
+計
+話
+話し
+誌
+語っ
+読む
+誰
+課
+調べ
+論
+貴方
+貴方方
+輪
+近く
+述べ
+通
+速報
+連
+週
+道
+達
+違い
+選
+部
+都
+金
+銭
+開か
+間
+関
+関係
+関連
+際
+集
+面
+頃
+類
+首
+高
+！
+！？
+＂
+＃
+＄
+％
+＆
+＇
+（
+）
+＊
+＋
+，
+－
+．
+／
+：
+；
+＜
+＝
+＞
+？
+＠
+［
+＼
+］
+＾
+＿
+｀
+｛
+｜
+｝
+～
+｟
+｠
+｡
+｢
+｣
+､
+･
+￠
+￡
+￢
+￣
+￤
+￥
+￦
+￨
+￩
+￪
+￫
+￬
+￭
+￮
diff --git a/apps/common/src/python/mediawords/languages/lt/lt_stop_words.txt b/apps/common/src/python/mediawords/languages/lt/lt_stop_words.txt
old mode 100755
new mode 100644
index 69707d4e8c..5db1a5f6ef
--- a/apps/common/src/python/mediawords/languages/lt/lt_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/lt/lt_stop_words.txt
@@ -1,19 +1,74 @@
-#
 # This is a stop word list for the Lithuanian language.
-#
 # Sources:
-#     http://www.filewatcher.com/p/punbb-1.2.16.tbz.620109/www/punbb/lang/Lithuanian/stopwords.txt.html
-#     auto-generated sources
 #
+#     auto-generated sources
+#     http://www.filewatcher.com/p/punbb-1.2.16.tbz.620109/www/punbb/lang/Lithuanian/stopwords.txt.html
+#     https://github.com/stopwords-iso/stopwords-lt/blob/master/stopwords-lt.txt
+
 
 a
+abi
+abidvi
+abiejose
+abiejuose
+abiejų
+abiem
+abigaliai
+abipus
+abu
+abudu
+ai
+ana
+anaiptol
+anaisiais
+anajai
+anajam
+anajame
+anapus
+anas
+anasai
+anasis
+anei
+aniedvi
+anieji
+aniesiems
+anoji
+anojo
+anojoje
+anokia
+anoks
+anosiomis
+anosioms
+anosios
+anosiose
+anot
+ant
+antai
+anuodu
+anuoju
+anuosiuose
+anuosius
+anąja
+anąją
+anąjį
+anąsias
+anųjų
 apie
+aplink
 ar
 arba
+argi
+arti
+aukščiau
 aš
 be
 bei
+beje
+bemaž
+bent
 bet
+betgi
+beveik
 bus
 buvo
 būti
@@ -22,112 +77,474 @@ d
 dabar
 dar
 darbo
+dargi
 daryti
 daug
 daugiau
 daugiausia
+daugmaž
 dažnai
+deja
 dieną
+dėka
 dėl
+dėlei
+dėlto
+ech
+et
+gal
+galbūt
+galgi
 gali
+gan
+gana
 gauna
 gauti
+gi
+greta
+idant
 iki
 ir
+irgi
+it
+itin
 iš
+išilgai
+išvis
+jaisiais
+jajai
+jajam
+jajame
 jam
 jau
 jei
+jeigu
 ji
 jie
+jiedu
+jiedvi
+jieji
+jiesiems
+jinai
 jis
+jisai
 jo
 jog
+joji
+jojo
+jojoje
+jokia
+joks
 jos
+josiomis
+josioms
+josios
+josiose
+judu
+judvi
+juk
+jumis
+jums
+jumyse
+juodu
+juoju
+juosiuose
+juosius
+jus
+jąja
+jąją
+jąsias
 jį
+jįjį
+jūs
+jūsiškis
+jūsiškė
+jūsų
 jų
+jųjų
 kad
 kada
+kadangi
 kai
 kaip
+kaipgi
 kam
 kartą
 kas
+katra
+katras
+katriedvi
+katruodu
+kažin
+kažkas
+kažkatra
+kažkatras
+kažkokia
+kažkoks
+kažkuri
+kažkuris
+kiaurai
+kiek
 kiekvienas
+kieno
+kita
 kitas
+kitokia
+kitoks
 klausimas
 klausti
+kodėl
+kokia
+koks
+kol
+kolei
+kone
 kovo
+kuomet
 kur
+kurgi
+kuri
 kurie
+kuriedvi
 kurios
 kuris
+kuriuodu
 kurių
 labai
+lai
 lietuva
 lietuvoje
 lietuvos
+lig
+ligi
+link
+lyg
 m
 man
+manaisiais
+manajai
+manajam
+manajame
+manas
+manasai
+manasis
+mane
+manieji
+maniesiems
+manim
+manimi
+maniškis
+maniškė
 mano
+manoji
+manojo
+manojoje
+manosiomis
+manosioms
+manosios
+manosiose
+manuoju
+manuosiuose
+manuosius
+manyje
+manąja
+manąją
+manąjį
+manąsias
+manęs
+manųjų
+mat
 mažai
 mažas
+maždaug
 mažiau
+mažne
 mes
 metais
 metu
 metus
 metų
+mudu
+mudvi
+mumis
+mums
+mumyse
+mus
+mūsiškis
+mūsiškė
 mūsų
+na
+nagi
 ne
+nebe
+nebent
 negali
+negi
 negu
 nei
+nejau
+nejaugi
+nekaip
+nelyginant
 nes
 net
+netgi
+netoli
+neva
 niekada
 niekas
 nors
 nuo
+nė
 nėra
 o
+ogi
+oi
+paeiliui
 pagal
+pakeliui
+palaipsniui
+palei
+pas
 pasak
 pasakė
+paskos
+paskui
+paskum
 pat
+pati
+patiems
+paties
+pats
+patys
+patį
+pačiais
+pačiam
+pačiame
+pačiu
+pačiuose
+pačius
+pačių
 per
+pernelyg
+pirm
+pirma
+pirmiau
 po
 prašau
 prie
 prieš
+priešais
+pro
+pusiau
 r
+rasi
 reikia
+rodos
 sakyti
 sakė
+sau
+savaisiais
+savajai
+savajam
+savajame
+savas
+savasai
+savasis
+save
+savieji
+saviesiems
+savimi
+saviškis
+saviškė
 savo
+savoji
+savojo
+savojoje
+savosiomis
+savosioms
+savosios
+savosiose
+savuoju
+savuosiuose
+savuosius
+savyje
+savąja
+savąją
+savąjį
+savąsias
+savęs
+savųjų
+skersai
+skradžiai
+stačiai
 su
+sulig
+ta
+tad
 tai
+taigi
 taip
 taip pat
+taipogi
+taisiais
+tajai
+tajam
+tajame
+tamsta
 tarp
+tarsi
+tartum
+tarytum
 tas
+tasai
+tau
+tavaisiais
+tavajai
+tavajam
+tavajame
+tavas
+tavasai
+tavasis
+tave
+tavieji
+taviesiems
+tavimi
+taviškis
+taviškė
 tavo
+tavoji
+tavojo
+tavojoje
+tavosiomis
+tavosioms
+tavosios
+tavosiose
+tavuoju
+tavuosiuose
+tavuosius
+tavyje
+tavąja
+tavąją
+tavąjį
+tavąsias
+tavęs
+tavųjų
 tačiau
+te
+tegu
+tegul
+tiedvi
+tieji
+ties
+tiesiems
+tiesiog
 tik
 tikrai
+tikriausiai
+tiktai
 to
 todėl
+toji
+tojo
+tojoje
+tokia
+toks
+tol
+tolei
+toliau
+tosiomis
+tosioms
+tosios
+tosiose
+tu
 tuo
+tuodu
+tuoju
+tuosiuose
+tuosius
+turbūt
 turi
 turėjo
+tąja
+tąją
+tąjį
+tąsias
+tūlas
+tųjų
 už
+užtat
+užvis
+va
+vai
 val
+viduj
+vidury
+vien
 vienas
+vienokia
+vienoks
+vietoj
+virš
+viršuj
+viršum
+vis
+vis dėlto
+visa
+visas
+visgi
 visi
+visokia
+visoks
+vos
+vėl
+vėlgi
+ypač
 yra
 čia
 į
+įkypai
+įstrižai
 šalia
 šalies
+še
+ši
+šiaisiais
+šiajai
+šiajam
+šiajame
+šiapus
+šiedvi
+šieji
+šiesiems
+šioji
+šiojo
+šiojoje
+šiokia
+šioks
 šios
-žmonių
+šiosiomis
+šiosioms
+šiosios
+šiosiose
+šis
+šisai
+šit
+šita
+šitas
+šitiedvi
+šitokia
+šitoks
+šituodu
+šiuodu
+šiuoju
+šiuosiuose
+šiuosius
+šiąja
+šiąją
+šiąsias
+šiųjų
+štai
+šįjį
+žemiau
+žmonių
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/lt/lt_stop_words_old.txt b/apps/common/src/python/mediawords/languages/lt/lt_stop_words_old.txt
new file mode 100755
index 0000000000..69707d4e8c
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/lt/lt_stop_words_old.txt
@@ -0,0 +1,133 @@
+#
+# This is a stop word list for the Lithuanian language.
+#
+# Sources:
+#     http://www.filewatcher.com/p/punbb-1.2.16.tbz.620109/www/punbb/lang/Lithuanian/stopwords.txt.html
+#     auto-generated sources
+#
+
+a
+apie
+ar
+arba
+aš
+be
+bei
+bet
+bus
+buvo
+būti
+būtų
+d
+dabar
+dar
+darbo
+daryti
+daug
+daugiau
+daugiausia
+dažnai
+dieną
+dėl
+gali
+gauna
+gauti
+iki
+ir
+iš
+jam
+jau
+jei
+ji
+jie
+jis
+jo
+jog
+jos
+jį
+jų
+kad
+kada
+kai
+kaip
+kam
+kartą
+kas
+kiekvienas
+kitas
+klausimas
+klausti
+kovo
+kur
+kurie
+kurios
+kuris
+kurių
+labai
+lietuva
+lietuvoje
+lietuvos
+m
+man
+mano
+mažai
+mažas
+mažiau
+mes
+metais
+metu
+metus
+metų
+mūsų
+ne
+negali
+negu
+nei
+nes
+net
+niekada
+niekas
+nors
+nuo
+nėra
+o
+pagal
+pasak
+pasakė
+pat
+per
+po
+prašau
+prie
+prieš
+r
+reikia
+sakyti
+sakė
+savo
+su
+tai
+taip
+taip pat
+tarp
+tas
+tavo
+tačiau
+tik
+tikrai
+to
+todėl
+tuo
+turi
+turėjo
+už
+val
+vienas
+visi
+yra
+čia
+į
+šalia
+šalies
+šios
+žmonių
diff --git a/apps/common/src/python/mediawords/languages/nl/nl_stop_words.txt b/apps/common/src/python/mediawords/languages/nl/nl_stop_words.txt
index 1ee9a2887d..6ef3790c11 100644
--- a/apps/common/src/python/mediawords/languages/nl/nl_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/nl/nl_stop_words.txt
@@ -1,108 +1,415 @@
-#
-# This is a stop word list for the Dutch language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-nl/blob/master/stopwords-nl.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 aan
+aangaande
+aangezien
+achte
+achter
+achterna
+af
+afgelopen
 al
+aldaar
+aldus
+alhoewel
+alias
+alle
+allebei
+alleen
 alles
 als
+alsnog
 altijd
+altoos
+ander
 andere
+anders
+anderszins
+beetje
+behalve
+behoudens
+beide
+beiden
 ben
+beneden
+bent
+bepaald
+betreffende
 bij
+bijna
+bijv
+binnen
+binnenin
+blijkbaar
+blijken
+boven
+bovenal
+bovendien
+bovengenoemd
+bovenstaand
+bovenvermeld
+buiten
+bv
 daar
+daardoor
+daarheen
+daarin
+daarna
+daarnet
+daarom
+daarop
+daaruit
+daarvanlangs
 dan
 dat
 de
+deden
+deed
 der
+derde
+derhalve
+dertig
 deze
+dhr
 die
+dikwijls
 dit
 doch
+doe
 doen
+doet
 door
+doorgaand
+drie
+duizend
 dus
+echter
 een
 eens
+eer
+eerdat
+eerder
+eerlang
+eerst
+eerste
+eigen
+eigenlijk
+elk
+elke
 en
+enig
+enige
+enigszins
+enkel
 er
+erdoor
+erg
+ergens
+etc
+etcetera
+even
+eveneens
+evenwel
+gauw
 ge
+gedurende
 geen
+gehad
+gekund
+geleden
+gelijk
+gemoeten
+gemogen
+genoeg
 geweest
+gewoon
+gewoonweg
 haar
+haarzelf
 had
+hadden
+hare
 heb
 hebben
+hebt
+hedden
 heeft
+heel
 hem
+hemzelf
+hen
 het
+hetzelfde
 hier
+hierbeneden
+hierboven
+hierin
+hierna
+hierom
 hij
+hijzelf
 hoe
+hoewel
+honderd
 hun
+hunne
+ieder
+iedere
+iedereen
 iemand
 iets
 ik
+ikzelf
 in
+inderdaad
+inmiddels
+intussen
+inzake
 is
 ja
 je
+jezelf
+jij
+jijzelf
+jou
+jouw
+jouwe
+juist
+jullie
 kan
+klaar
 kon
+konden
+krachtens
+kun
 kunnen
+kunt
+laatst
+later
+liever
+lijken
+lijkt
+maak
+maakt
+maakte
+maakten
 maar
+mag
+maken
 me
 meer
+meest
+meestal
 men
 met
+mevr
+mezelf
 mij
 mijn
+mijnent
+mijner
+mijzelf
+minder
+miss
+misschien
+missen
+mits
+mocht
+mochten
+moest
+moesten
 moet
+moeten
+mogen
+mr
+mrs
+mw
 na
 naar
+nadat
+nam
+namelijk
+nee
+neem
+negen
+nemen
+nergens
+net
+niemand
 niet
 niets
+niks
+noch
+nochtans
 nog
+nogal
+nooit
 nu
+nv
 of
+ofschoon
 om
 omdat
+omhoog
+omlaag
+omstreeks
+omtrent
+omver
+ondanks
 onder
-ons
+ondertussen
+ongeveer
+onszelf
+onze
+onzeker
+ooit
 ook
 op
+opnieuw
+opzij
 over
+overal
+overeind
+overige
+overigens
+paar
+pas
+per
+precies
+recent
 reeds
+rond
+rondom
+samen
+sedert
+sinds
+sindsdien
+slechts
+sommige
+spoedig
+steeds
+tamelijk
 te
-tegen
+tenzij
+terwijl
+thans
+tien
+tiende
+tijdens
+tja
 toch
+toe
 toen
+toenmaals
+toenmalig
 tot
+totdat
+tussen
+twee
+tweede
 u
 uit
+uitgezonderd
 uw
+vaak
+vaakwat
 van
+vanaf
+vandaan
+vanuit
+vanwege
 veel
+veeleer
+veertig
+verder
+verscheidene
+verschillende
+vervolgens
+via
+vier
+vierde
+vijf
+vijfde
+vijftig
+vol
+volgend
+volgens
 voor
+vooraf
+vooral
+vooralsnog
+voorbij
+voordat
+voordezen
+voordien
+voorheen
+voorop
+voorts
+vooruit
+vrij
+vroeg
+waar
+waarom
+waarschijnlijk
+wanneer
 want
 waren
 was
 wat
+we
+wederom
+weer
+weg
+wegens
+weinig
+wel
+weldra
+welk
+welke
 werd
+werden
+werder
 wezen
+whatever
 wie
+wiens
+wier
+wij
+wijzelf
 wil
+wilden
+willen
+word
 worden
 wordt
 zal
 ze
+zei
+zeker
 zelf
+zelfde
+zelfs
+zes
+zeven
 zich
+zichzelf
 zij
 zijn
+zijne
+zijzelf
 zo
+zoals
+zodat
+zodra
 zonder
 zou
+zouden
+zowat
+zulk
+zulke
+zullen
+zult
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/nl/nl_stop_words_old.txt b/apps/common/src/python/mediawords/languages/nl/nl_stop_words_old.txt
new file mode 100644
index 0000000000..1ee9a2887d
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/nl/nl_stop_words_old.txt
@@ -0,0 +1,108 @@
+#
+# This is a stop word list for the Dutch language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+aan
+al
+alles
+als
+altijd
+andere
+ben
+bij
+daar
+dan
+dat
+de
+der
+deze
+die
+dit
+doch
+doen
+door
+dus
+een
+eens
+en
+er
+ge
+geen
+geweest
+haar
+had
+heb
+hebben
+heeft
+hem
+het
+hier
+hij
+hoe
+hun
+iemand
+iets
+ik
+in
+is
+ja
+je
+kan
+kon
+kunnen
+maar
+me
+meer
+men
+met
+mij
+mijn
+moet
+na
+naar
+niet
+niets
+nog
+nu
+of
+om
+omdat
+onder
+ons
+ook
+op
+over
+reeds
+te
+tegen
+toch
+toen
+tot
+u
+uit
+uw
+van
+veel
+voor
+want
+waren
+was
+wat
+werd
+wezen
+wie
+wil
+worden
+wordt
+zal
+ze
+zelf
+zich
+zij
+zijn
+zo
+zonder
+zou
diff --git a/apps/common/src/python/mediawords/languages/no/no_stop_words.txt b/apps/common/src/python/mediawords/languages/no/no_stop_words.txt
index 2fd8a00993..5949a9c321 100644
--- a/apps/common/src/python/mediawords/languages/no/no_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/no/no_stop_words.txt
@@ -1,13 +1,17 @@
-#
-# This is a stop word list for the Norwegian language.
-#
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-no/blob/master/stopwords-no.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
+å
 alle
+andre
+arbeid
 at
 av
+både
+båe
 bare
 begge
 ble
@@ -15,9 +19,10 @@ blei
 bli
 blir
 blitt
-både
-båe
+bort
+bruke
 da
+då
 de
 deg
 dei
@@ -39,7 +44,6 @@ ditt
 du
 dykk
 dykkar
-då
 eg
 ein
 eit
@@ -47,15 +51,26 @@ eitt
 eller
 elles
 en
+ene
+eneste
+enhver
 enn
 er
 et
 ett
 etter
+få
+folk
 for
+før
 fordi
+forsûke
 fra
-før
+fûr
+gå
+gjorde
+gjûre
+god
 ha
 hadde
 han
@@ -84,11 +99,11 @@ hvorfor
 i
 ikke
 ikkje
-ikkje
 ingen
 ingi
 inkje
 inn
+innen
 inni
 ja
 jeg
@@ -104,8 +119,15 @@ kvarhelst
 kven
 kvi
 kvifor
+lage
+lang
+lik
+like
+må
+makt
 man
 mange
+måte
 me
 med
 medan
@@ -113,13 +135,21 @@ meg
 meget
 mellom
 men
+mens
+mer
+mest
 mi
 min
 mine
 mitt
 mot
+mye
 mykje
+nå
+når
+navn
 ned
+nei
 no
 noe
 noen
@@ -128,8 +158,7 @@ noko
 nokon
 nokor
 nokre
-nå
-når
+ny
 og
 også
 om
@@ -137,35 +166,47 @@ opp
 oss
 over
 på
+part
+punkt
+så
 samme
+sånn
+sant
 seg
 selv
 si
-si
 sia
 sidan
 siden
 sin
 sine
+sist
 sitt
 sjøl
 skal
 skulle
 slik
+slutt
 so
 som
-som
 somme
 somt
-så
-sånn
+start
+stille
+tid
 til
+tilbake
+tilstand
 um
+under
 upp
 ut
 uten
 var
+vår
+være
 vart
+vært
 varte
 ved
 vere
@@ -173,11 +214,9 @@ verte
 vi
 vil
 ville
+vite
 vore
+vöre
 vors
 vort
-vår
-være
-være
-vært
-å
+vört
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/no/no_stop_words_old.txt b/apps/common/src/python/mediawords/languages/no/no_stop_words_old.txt
new file mode 100644
index 0000000000..2fd8a00993
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/no/no_stop_words_old.txt
@@ -0,0 +1,183 @@
+#
+# This is a stop word list for the Norwegian language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+alle
+at
+av
+bare
+begge
+ble
+blei
+bli
+blir
+blitt
+både
+båe
+da
+de
+deg
+dei
+deim
+deira
+deires
+dem
+den
+denne
+der
+dere
+deres
+det
+dette
+di
+din
+disse
+ditt
+du
+dykk
+dykkar
+då
+eg
+ein
+eit
+eitt
+eller
+elles
+en
+enn
+er
+et
+ett
+etter
+for
+fordi
+fra
+før
+ha
+hadde
+han
+hans
+har
+hennar
+henne
+hennes
+her
+hjå
+ho
+hoe
+honom
+hoss
+hossen
+hun
+hva
+hvem
+hver
+hvilke
+hvilken
+hvis
+hvor
+hvordan
+hvorfor
+i
+ikke
+ikkje
+ikkje
+ingen
+ingi
+inkje
+inn
+inni
+ja
+jeg
+kan
+kom
+korleis
+korso
+kun
+kunne
+kva
+kvar
+kvarhelst
+kven
+kvi
+kvifor
+man
+mange
+me
+med
+medan
+meg
+meget
+mellom
+men
+mi
+min
+mine
+mitt
+mot
+mykje
+ned
+no
+noe
+noen
+noka
+noko
+nokon
+nokor
+nokre
+nå
+når
+og
+også
+om
+opp
+oss
+over
+på
+samme
+seg
+selv
+si
+si
+sia
+sidan
+siden
+sin
+sine
+sitt
+sjøl
+skal
+skulle
+slik
+so
+som
+som
+somme
+somt
+så
+sånn
+til
+um
+upp
+ut
+uten
+var
+vart
+varte
+ved
+vere
+verte
+vi
+vil
+ville
+vore
+vors
+vort
+vår
+være
+være
+vært
+å
diff --git a/apps/common/src/python/mediawords/languages/pt/pt_stop_words.txt b/apps/common/src/python/mediawords/languages/pt/pt_stop_words.txt
index d49861eea5..964c0d13d1 100644
--- a/apps/common/src/python/mediawords/languages/pt/pt_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/pt/pt_stop_words.txt
@@ -1,11 +1,11 @@
-#
-# This is a "long" stop word list for the Portuguese language.
-#
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 # Sources:
-#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
 #
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-pt/blob/master/stopwords-pt.txt
 
 a
+à
 a meta
 abaixo
 abastecimento
@@ -22,18 +22,17 @@ abrir
 abriu
 absoluta
 absolutamente
-absurdo
-abuso
 acaba
 acabam
 acabar
 acabaram
 acabou
-academia
+ação
 acaso
 aceita
 aceitar
 aceitou
+acerca
 acertar
 acertou
 acesso
@@ -43,8 +42,6 @@ achar
 achei
 acho
 achou
-acidente
-acidentes
 acima
 acompanha
 acompanhada
@@ -60,144 +57,52 @@ acontecerá
 aconteceu
 acontecido
 acontecimentos
-acordo
 acredita
 acreditam
 acreditar
 acredito
 acrescenta
 acrescentou
-acumulado
-acusado
-acusados
-acusação
-acusações
-adequada
-adequado
-adesão
+adeus
 adianta
 adiante
 adiantou
-administrador
-administrar
-administrativa
-administrativo
-administração
-admite
-admitiu
-adolescente
-adolescentes
-adotar
-adoção
-adquirir
-adultos
-adversário
-adversários
-advogada
-advogado
-advogados
-aeroporto
 afastado
 afastar
 afinal
-afirma
-afirmam
 afirmando
 afirmar
 afirmou
-agenda
-agente
-agentes
 agir
 agora
 agosto
 agradecer
-agressão
-agricultores
-agricultura
-agrícola
 aguarda
 aguardar
-agência
-agências
+águas
 ah
+aí
 ainda
-ajuda
-ajudam
-ajudar
-ajudou
 ala
 alcançar
 alega
 alegou
-alegre
-alegria
-alemão
+alem
+além
 alerta
+algmas
 algo
+alguém
 algum
 alguma
 algumas
 alguns
-alguém
 ali
-aliado
-aliados
-aliança
-alimentar
-alimentação
-alimento
-alimentos
 aliás
-alma
-almoço
-alta
-altas
-alteração
-alterações
-alternativa
-alternativas
-alto
-altos
-altura
-aluguel
-aluno
-alunos
 alves
-alvinegro
-alvo
-além
-ama
 amanhã
-amarelo
 ambas
-ambientais
-ambiental
-ambiente
 ambos
-ameaça
-ameaças
-americano
-americanos
-amiga
-amigo
-amigos
-amizade
-amor
-ampla
-ampliar
-ampliação
-amplo
-analisa
-analisar
-analistas
-anda
-andamento
-andar
-animais
-animal
-animação
-aniversário
 ano
 anos
 ante
@@ -212,151 +117,63 @@ anual
 anuncia
 anunciado
 anunciar
-anunciou
-análise
 anúncio
+anunciou
 ao
+aonde
 aos
 aparece
 aparecem
 aparecer
 apareceu
 aparecida
-aparelho
-aparelhos
-apartamento
-apelo
 apenas
 apesar
+aplicação
 aplicada
 aplicado
 aplicar
-aplicação
 apoia
 apoiar
 apoio
 aponta
 apontam
+apontar
 apontou
-aposentado
-aposentadoria
-aposentados
-aposta
-apreensão
-aprender
-aprendizado
-apresenta
-apresentada
-apresentadas
-apresentado
-apresentados
-apresentam
-apresentar
-apresentaram
-apresentação
-apresentações
-apresentou
-aprovada
-aprovado
-aprovados
-aprovar
-aprovação
-aproveitar
-aproveitou
+apos
+após
 aproximadamente
 apuração
-após
 aquela
 aquelas
 aquele
 aqueles
 aqui
 aquilo
-aquisição
 ar
-areia
-arena
-argumento
-argumentos
-arma
-armado
-armas
-arrecadação
-arroz
-arruda
-art
-arte
-artes
-artigo
-artigos
-artilheiro
-artista
-artistas
+área
+áreas
 as
-asfalto
-aspecto
-aspectos
-assaltantes
-assalto
-assassinato
-assembleia
-assessor
-assessores
-assessoria
+às
 assim
-assinado
-assinar
-assinatura
-assinou
-assistente
-assistir
-assistência
-associados
-associação
-associações
 assume
 assumir
 assumiu
 assunto
 assuntos
 at
-atacante
-atacar
-ataque
-ataques
+até
 atende
 atendendo
 atender
 atendidas
-atendido
-atendidos
-atendimento
-atendimentos
-atento
-atenção
-atinge
-atingido
-atingir
-atingiu
-atitude
-atitudes
-ativa
-atividade
-atividades
-atleta
-atletas
 ato
-ator
-atores
 atos
 atrair
+atrás
 atraso
 através
-atração
-atrações
-atriz
-atrás
-atua
+atuação
 atuais
 atual
 atualizado
@@ -364,102 +181,27 @@ atualmente
 atuam
 atuando
 atuar
-atuação
 atuou
-até
-auditório
-audiência
-aula
-aulas
 aumenta
 aumentando
 aumentar
 aumento
 aumentou
-ausência
-automóveis
-automóvel
-autonomia
-autor
-autores
-autoria
-autoridade
-autoridades
-autorização
-autos
-auxiliar
-auxílio
-avalia
-avaliar
-avaliação
 avaliou
-avançar
-avanço
-avanços
-avançou
-avenida
-avisa
-avião
-avó
-azul
-ação
-ações
-aérea
-aí
-baiano
-baile
-bairro
-bairros
-baixa
-baixo
-baixos
-balanço
-bancada
-banco
-bancos
-banda
-bandas
-bandeira
-bandido
-bandidos
-banheiro
-banho
-bar
-barato
-barco
-bares
-barra
-barreiras
-barros
-barulho
 base
 baseado
 bases
+básica
+básicas
+básico
 basta
 bastante
-bastidores
-batalha
-batalhão
-bate
-bater
-bateria
-bateu
-beber
-bebida
-bebidas
-bebê
-beira
-bela
-beleza
-belo
 bem
 beneficiar
 benefício
 benefícios
 bens
 bernardo
-biblioteca
-bicicleta
 bilhão
 bilhões
 bloco
@@ -467,175 +209,28 @@ blocos
 blog
 boa
 boas
-boca
-bola
-boletim
-bolsa
-bolsas
-bolso
 bom
-bomba
-bombeiros
-bonita
-bonito
 bons
-branca
-branco
-brancos
-brasileiras
-brasileiro
-brasileiros
-braço
-braços
-breve
-briga
-brilhante
-brincadeira
-brincar
-brinquedos
-bruto
-buraco
-buracos
-busca
-buscam
-buscando
-buscar
-básica
-básicas
-básico
+cá
 cabe
 cabeceou
-cabelo
-cabelos
-cabeça
-cabo
-cachorro
 cada
-cadastro
-cadeia
-cadeira
-cadeiras
 cadê
-cai
-cair
-caiu
-caixa
-caixas
-calendário
-calma
-calor
-calçada
-cama
-caminhada
-caminho
-caminhos
-caminhão
-caminhões
-camisa
-campanha
-campanhas
-campeonato
-campeão
-campeões
-campo
-campos
-campus
-cana
-canal
-candidata
-candidato
-candidatos
-candidatura
-candidaturas
-canto
-cantor
-cantora
-caos
-capa
-capacidade
-capacitação
-capaz
-capazes
-capitais
-capital
-capitão
-capixaba
-capítulo
-cara
-característica
-características
-caras
-carga
-cargo
-cargos
-carinho
-carioca
-carne
-caro
-carreira
-carro
-carros
-carta
-cartas
-carteira
-cartão
-cartório
-cartões
-caruaru
-caráter
-casa
-casado
-casal
-casamento
-casar
-casas
-caso
-casos
-cassado
-cassação
-castelo
 catarinense
 categoria
 categorias
-causa
-causar
-causas
-causou
-caíram
-cd
-cedo
-celular
-cem
-cemitério
-cena
+cenário
 cenas
 centenas
 cento
 centrais
 central
-centro
-centros
-cenário
-cerca
-cerimônia
-certa
-certamente
-certas
-certeza
-certo
-certos
-cerveja
-chama
 chamada
 chamado
 chamados
 chamar
 chamou
-chance
-chances
-chapa
-chave
-chefe
+chão
 chega
 chegada
 chegam
@@ -645,204 +240,59 @@ chegar
 chegaram
 chegou
 cheguei
-cheia
-cheio
-cheiro
-cheque
-choque
-chute
-chutou
-chuva
-chuvas
-chão
-ciclo
-cidadania
-cidade
-cidades
-cidadão
-cidadãos
-cientistas
 cima
 cinco
-cinema
-circo
-circuito
-circulação
-cirurgia
 cita
 citado
 citar
 citou
-civil
-civis
-ciência
-ciências
-classe
-classes
-classificação
-cliente
-clientes
-clima
-clique
-clube
-clubes
-clássico
-clínica
-cobertura
-cobra
-cobrança
-cobrar
-cobrou
-cofres
 coisa
 coisas
 colaboradores
-colega
-colegas
-coleta
-coletiva
-coletivo
-coleção
-coligação
 coloca
+colocação
 colocada
 colocado
 colocados
 colocando
 colocar
-colocação
 colocou
 coloque
-colorado
-coluna
-colunista
-colégio
 com
-comandante
-comando
-combate
-combater
-combustível
-comecei
-comemora
-comemorar
-comemoração
-comemorou
-comenta
-comentar
-comentou
-comentário
-comentários
-comer
-comerciais
-comercial
-comercialização
-comerciante
-comerciantes
 começa
 começam
 começando
 começar
 começaram
+comecei
 começo
 começou
-comida
+comenta
+comentar
+comentário
+comentários
+comentou
 comigo
-comissão
-comitê
 como
-companheiro
-companheiros
-companhia
-companhias
-comparação
-competente
-competição
-competições
-competência
 complementar
-completa
 completamente
-completar
-completo
-completou
-complexo
-complicado
-compor
-comportamento
-composição
-composta
-composto
-compra
-comprar
-compras
-compreensão
-compromisso
-compromissos
-comprou
-computador
-computadores
-comum
-comunicado
-comunicação
-comunidade
-comunidades
-comuns
-comércio
-conceito
-conceitos
-concentração
-concessão
-conclui
-concluir
-concluiu
-conclusão
-concorda
 concordo
-concorrentes
-concorrer
-concorrência
-concreto
-concurso
-concursos
-condenado
-condenação
-condição
-condições
-conduta
-confederação
 conferir
-conferência
 confiança
 confira
 confirma
 confirmado
 confirmar
 confirmou
-conflito
-conflitos
 conforme
-conforto
-confronto
-confusão
 conhece
 conhecem
 conhecer
 conheceu
 conhecida
 conhecido
-conhecidos
-conhecimento
-conhecimentos
 conheço
 conjunto
-conquista
-conquistar
-conquistas
-conquistou
-consciente
-conscientização
-consciência
 consegue
 conseguem
 consegui
@@ -851,45 +301,21 @@ conseguimos
 conseguir
 conseguiram
 conseguiu
-conselheiro
-conselho
-conselhos
-consenso
-conservação
 considera
+consideração
 considerada
 considerado
 considerados
 considerando
 considerar
-consideração
 considerou
 consigo
 consta
-constante
-constantes
-constitucional
-construir
-construção
-construída
-construído
-consulta
-consultas
-consultoria
-consumidor
-consumidores
-consumo
-consórcio
-conta
-contam
-contando
 contar
 contará
-contas
 contato
 contatos
 conter
-contexto
 conteúdo
 continua
 continuam
@@ -900,492 +326,139 @@ continuidade
 continuou
 contou
 contra
-contrapartida
-contratado
-contratados
-contratar
-contratação
-contratações
-contrato
-contratos
-contribuinte
-contribuir
-contribuição
-controlar
-controle
 contrário
 contudo
-convencer
-convenção
-conversa
-conversar
-conversas
-convidado
-convidados
-convite
-conviver
-convivência
-convênio
-cooperativa
-coordenador
-coordenadora
-coordenação
-cor
-coragem
-coração
-cores
-coronel
-corpo
-corpos
-corre
-correndo
-corrente
-correr
-correta
-correto
-correção
-corrida
-cortar
-corte
-cortes
 costas
 costuma
 costumam
 cotidiano
-cozinha
-credibilidade
-creio
-cresce
-crescendo
-crescente
-crescer
-cresceu
-crescimento
-cria
-criada
 criado
 criando
-crianças
 criar
-criatividade
-criação
-crime
-crimes
-criminal
-criminalidade
-criminosos
 criou
-crise
-criticar
-criticou
-critério
-critérios
-cruzamento
-cruzes
-cruzou
-crédito
-créditos
-crítica
-críticas
-crítico
-cuidado
-cuidados
-cuidar
 cuja
+cujas
 cujo
-culpa
-cultura
-culturais
-cultural
-cumprimento
-cumprir
-cumpriu
-cunha
-currículo
-curso
-cursos
-curto
-custa
-custo
-custos
-cá
-cães
-cérebro
-céu
-código
-cúpula
-da
-dada
-dado
+cujos
 dados
+daí
+dança
 dando
-danos
 dantas
-dança
 daquela
+daquelas
 daquele
 daqueles
 daqui
-dar
-daria
-dará
-das
-data
-dava
-daí
 de
-de deus
-debaixo
-debate
-debates
-decide
-decidir
-decidiu
-decisão
-decisões
-declaração
-declarações
-declarou
-decoração
-decreto
-dedicação
-defende
-defender
-defendeu
-defensor
-defesa
-deficiência
-define
-definida
-definido
-definir
-definitivamente
-definiu
-definição
-deixa
 deixado
-deixam
-deixando
-deixar
-deixaram
-deixe
-deixou
-dela
-delas
-dele
-delegacia
-delegado
-deles
-demais
-demanda
-demandas
-demissão
-democrático
-demonstra
-demonstrou
-demora
-demorou
 dentre
 dentro
-denunciar
-denúncia
-denúncias
-departamento
-depende
 dependendo
 depender
-depoimento
-depoimentos
 depois
-deputada
-deputado
-deputados
-der
-deram
-derrota
-derrotado
-desafio
-desafios
-descoberta
-descobre
-descobrir
-descobriu
-desconto
-desculpas
 desde
-deseja
-desejar
-desejo
-desembargador
-desempenho
-desemprego
-desenvolver
-desenvolvido
-desenvolvimento
-desespero
-desfile
-despesas
+desligado
 dessa
 dessas
 desse
 desses
 desta
-destaca
-destacar
-destacou
-destaque
-destaques
 destas
 deste
 destes
-destinado
-destinados
-destino
-desvio
-desviou
-detalhe
-detalhes
-determina
-determinado
-determinação
-determinou
 deu
 deve
 deve-se
 devem
 devemos
 dever
-deveria
-deveriam
 deverá
 deverão
+deveria
+deveriam
 devia
 devido
 dez
+dezanove
+dezasseis
+dezassete
 dezembro
 dezenas
+dezoito
 dia
 dia-a-dia
-diagnóstico
 diante
+diária
 diariamente
+diárias
+diário
 dias
 dica
 dicas
-diferente
-diferentes
 diferença
 diferenças
+diferente
+diferentes
+difíceis
+difícil
 dificilmente
 dificuldade
 dificuldades
-difíceis
-difícil
 diga
 digital
 dignidade
 digo
 diminuir
-diminuição
-dinheiro
-diploma
+direção
+direita
 direito
 direta
 diretamente
-direto
-diretor
-diretora
-diretores
-diretoria
-diretório
-direção
 diria
-dirigente
-dirigentes
-dirigir
 disciplina
-disco
-discurso
-discussão
-discussões
-discutir
+dispõe
+dispoem
 disponíveis
 disponível
-disposição
 disposto
-disputa
-disputar
-dispõe
 disse
 disseram
 disso
-distante
-distribuição
-distribuídos
 distrito
 dito
-diversas
 diversos
-divisão
-divulgada
-divulgado
-divulgados
-divulgar
-divulgação
-divulgou
 diz
 dizem
 dizendo
 dizer
 dizia
-diálogo
-diária
-diárias
-diário
 do
-doação
-doações
-doce
-documentação
-documento
-documentos
-doente
-doença
-doenças
 dois
 domingo
-domínio
-dona
-dono
-donos
-dor
-dores
-dormir
 dos
-dose
-doutor
+doze
 duas
-duelo
-dupla
-dura
-durante
 duração
-duro
+durante
 durou
 dutra
-dvd
-dão
-década
-décadas
-déficit
-dívida
-dívidas
-dólar
-dólares
-dúvida
-dúvidas
 e
-e-mail
-economista
-econômica
-econômicas
-econômico
-econômicos
-edital
-edição
-edições
-educacional
+é
 efeito
 efeitos
-efetivamente
-efetivo
-eficiente
-eficiência
 eis
 ela
-elaboração
 elas
 ele
-eleger
-elegeu
-eleita
-eleito
-eleitor
-eleitorado
-eleitorais
-eleitores
-eleitos
-eleição
-eleições
-elementos
-elenco
 eles
-eletrônica
-eletrônico
-elevado
-elevação
-elite
-elogios
-elétrica
 em
 embora
-emenda
-emendas
-emergência
-emissora
-emissoras
-emissão
-emocional
-emoção
-emoções
-empate
-empatou
-empenho
-empreendimento
-empreendimentos
-empregados
-emprego
-empregos
-empresa
-empresarial
-empresas
-empresário
-empresários
-empréstimo
-empréstimos
-encaminhado
-encaminhados
-encarar
-encerramento
-encerrou
-enchentes
-encontra
-encontrada
-encontrado
-encontrados
-encontram
-encontrar
-encontraram
-encontro
-encontros
-encontrou
-endereço
-energia
-enfatizou
 enfim
-enfrenta
-enfrentar
-engenharia
-engenheiro
-enorme
 enquanto
-ensinar
-ensino
 entanto
-entende
-entender
-entendeu
-entendimento
+entao
+então
 entendo
 entidade
 entidades
@@ -1395,117 +468,41 @@ entram
 entrar
 entraram
 entre
-entrega
-entregar
-entregou
-entregue
-entregues
-entretanto
-entrevista
-entrevistados
-entrevistas
-entrou
-então
-enviado
-enviar
-enviou
-envolve
-envolvendo
-envolvido
-envolvidos
-envolvimento
-episódio
-equilíbrio
-equipamento
-equipamentos
-equipe
-equipes
-equivalente
 era
 eram
-errado
-erro
-erros
-escala
-escanteio
-esclarecer
-escola
-escolar
-escolas
-escolha
-escolher
-escolheu
-escolhido
-esconder
-escrever
-escreveu
-escrita
-escrito
-escritor
-escritório
-esforço
-esforços
-espanhol
-espaço
-espaços
-especiais
-especial
-especialista
-especialistas
-especializada
+éramos
+és
 especialmente
 específica
 específico
 espera
 esperado
 esperamos
-esperando
 esperança
+esperando
 esperar
 esperava
 espero
-espetáculo
-espiritual
-esportiva
-esportivo
-esposa
-espécie
-espécies
-espírito
-esquecer
-esquema
 esquerdo
 esquina
 essa
 essas
 esse
-essencial
 esses
 esta
-estabelece
-estabelecer
-estabelecimento
-estabelecimentos
-estabilidade
-estacionamento
-estado
-estados
-estaduais
-estadual
+está
 estamos
+estão
 estar
-estaria
-estariam
 estará
 estarão
+estaria
+estariam
 estas
-estatal
-estatuto
-estatística
-estatísticas
+estás
 estava
 estavam
-estação
+estávamos
 este
 esteja
 estejam
@@ -1514,176 +511,62 @@ estes
 esteve
 estilo
 estimativa
-estimular
 estive
 estivemos
 estiver
 estivera
 estiveram
+estivéramos
 estiverem
 estivermos
 estivesse
 estivessem
-estivéramos
 estivéssemos
+estiveste
+estivestes
 estou
-estrada
-estradas
-estrangeiros
-estranho
-estratégia
-estratégias
-estreia
-estrela
-estrelas
-estrutura
-estudar
-estudo
-estudos
-está
-estádio
-estágio
-estávamos
-estão
-etapa
-etapas
 etc
 etc.
 eu
-evento
-eventos
-eventual
-evidente
-evitar
-evolução
-ex-deputado
-ex-governador
-ex-prefeito
-ex-presidente
-exame
-exames
 exatamente
-excelente
-excelência
-excesso
-exceção
 exclusivamente
-executiva
-executivo
-execução
-exemplo
-exemplos
-exercer
-exercício
-exercícios
-exige
-exigir
-exigência
-exigências
-existe
-existem
-existentes
-existia
-existir
-existência
-expandir
-expansão
-expectativa
-expectativas
-experiência
-experiências
 explica
-explicar
 explicação
 explicações
+explicar
 explicou
-exploração
-exportações
-exposição
-expressão
-expulso
-extensão
-exterior
-externa
 extra
 extremamente
-exército
+façam
 face
-facilidade
+fácil
 facilitar
 facilmente
-faculdade
+faço
 faixa
-faixas
-fala
-falam
-falando
-falar
 falei
-falha
-falhas
 falou
-falta
-faltam
-faltando
-faltou
-fama
-familiar
-familiares
-famoso
-famílias
-faria
-farmácia
-farroupilha
 fará
-farão
-fase
-fato
-fator
-fatores
-fatos
-faturamento
-favor
-favorável
+faria
 faz
+fazeis
 fazem
 fazemos
 fazenda
 fazendo
 fazer
+fazes
 fazia
-faça
-façam
-faço
-febre
 fechada
 fechado
 fechados
 fechamento
 fechar
 fechou
-federais
-federal
-federação
-feira
 feita
 feitas
 feito
 feitos
-felicidade
-feliz
-felizes
-feminina
-feminino
-fenômeno
-feriado
-feridos
-ferramenta
-ferramentas
-ferro
-festa
-festas
-festival
 fevereiro
 fez
 fica
@@ -1691,852 +574,143 @@ ficam
 ficamos
 ficando
 ficar
-ficaram
-ficaria
 ficará
+ficaram
 ficarão
+ficaria
 ficava
-ficha
 fico
 ficou
-fiel
-figura
-fila
-filha
-filhas
-filho
-filhos
-filme
-filmes
-filosofia
-fim
-finais
 final
-finalidade
 finalizou
 finalmente
-financeira
-financeiras
-financeiro
-financeiros
-financiamento
-finanças
 fins
 fique
 fiquei
-firme
-fiscais
-fiscal
-fiscalizar
-fiscalização
 fiz
 fizemos
 fizeram
-fiéis
-flagrante
-flor
-flores
-floresta
-fluxo
 foco
-fogo
 foi
-folha
 fomos
-fonte
-fontes
 for
-fora
 foram
+fôramos
+forças
 forem
-forma
-formada
-formado
-formar
-formas
-formato
-formação
 formos
-forró
-fortalecer
-forte
-força
-forças
 fosse
 fossem
-foto
-fotos
-fraco
-francês
-frase
-fraude
-freitas
-frente
-frio
-frisou
-fronteira
-frota
-frutas
-fruto
-frutos
-fuga
-fugir
-fugiu
-fui
-funciona
-funcionamento
-funcionando
-funcionar
-funcionário
-funcionários
-fundamentais
-fundamental
-fundação
-fundo
-fundos
-função
-funções
-furto
-futebol
-futsal
-futuro
-futuros
-fábrica
-fácil
-fãs
-fé
-férias
-física
-físicas
-físico
-fórmula
-fórum
-fôramos
 fôssemos
-gabinete
-gado
-galeria
-ganha
-ganham
-ganhando
-ganhar
-ganho
-ganhos
-ganhou
-garante
-garantia
-garantir
-garantiu
-garota
-garoto
-gastar
-gasto
-gastos
-gaúcha
-gaúchos
-general
-gente
-geografia
-gera
-gerais
-geral
-geralmente
-gerando
-gerar
-geração
-gerente
-gerou
-gestor
-gestores
-gestão
-ginásio
-global
-gol
-goleiro
-golpe
-gols
-gosta
-gostam
+foste
+fostes
+fui
 gostaria
-gostei
-gosto
-gostou
-governador
-governadora
-governadores
-governantes
-governar
-governo
-governos
-gramado
-grande
-grandes
-gratuita
-gratuito
-grau
-grave
-graves
-graças
-grossa
-grosso
-grupo
-grupos
-grãos
-guarda
-guia
-gás
-gênero
-habitantes
-habitação
+ha
+há
 haja
 hajam
 hajamos
+hão
 harmonia
 havemos
 haver
-haveria
 haverá
+haveria
 havia
 haviam
-hectares
-hei
-helena
-hipótese
-história
-histórias
-histórica
-histórico
 hoje
-homem
-homenagem
-homens
-homicídio
-homicídios
-honra
 hora
-horas
-horizonte
-horário
-horários
-hospital
-hotel
-hotéis
 houve
 houvemos
 houver
 houvera
+houverá
 houveram
+houvéramos
+houverão
 houverei
 houverem
 houveremos
 houveria
 houveriam
-houvermos
-houverá
-houverão
 houveríamos
+houvermos
 houvesse
 houvessem
-houvéramos
 houvéssemos
-hugo
-humana
-humanidade
-humano
-humanos
-humor
-há
-hábito
-hão
 ia
 ibope
 ida
-idade
-ideal
-identidade
-identificado
-identificar
-identificação
-idosos
-idéia
-idéias
-iguais
-igual
-igualdade
-ilegal
-ilha
-iluminação
-imagem
-imagens
-imaginar
-imediata
-imediatamente
-imediato
-impacto
-impede
-impedir
-implantar
-implantação
-impor
-importa
-importante
-importantes
-impossível
-imposto
-impostos
-imprensa
-impressão
-imóveis
-imóvel
-inauguração
-incentivar
-incentivo
-inclui
-incluindo
-inclusive
-inclusão
-incrível
-incêndio
-indenização
-independente
-independentemente
-independência
-indica
-indicado
-indicar
-indicação
-individuais
-individual
-indivíduo
-indivíduos
 indo
-industrial
-indícios
-indígena
-indígenas
-indústria
-indústrias
-infantil
-infelizmente
-inferior
-influência
-informa
-informado
-informar
-informação
-informações
-informou
-informática
-infra-estrutura
-infraestrutura
-inglês
-ingresso
-ingressos
-inicia
-iniciada
-inicial
-inicialmente
-iniciar
-iniciativa
-iniciativas
-iniciou
-inquérito
-inscritos
-inscrição
-inscrições
-instalada
-instalar
-instalação
-instalações
-institucional
-instituição
-instituições
-instituto
-instrumento
-instrumentos
-integra
-integral
-integrante
-integrantes
-integrar
-integração
-inteira
-inteiro
-inteligente
-inteligência
-intensa
-intenso
-intenção
-intenções
-inter
-interessa
-interessados
-interessante
-interesse
-interesses
-interior
-interna
-internacionais
-internacional
-internado
-internet
-interno
-interpretação
-intervalo
-intervenção
-intuito
-invadiu
-inverno
-investidores
-investigar
-investigação
-investigações
-investimento
-investimentos
-investir
 invés
-início
-inúmeras
-inúmeros
 ir
-iria
-irmã
-irmão
-irmãos
-irregular
-irregularidades
 irá
 irão
+iria
 isso
+ista
+iste
 isto
-italiano
 item
 itens
+já
 jamais
 janeiro
-janela
-jantar
-jardim
-jc
-jeito
-joga
-jogada
-jogadas
-jogador
-jogadores
-jogando
-jogar
-jogo
-jogos
-jogou
-jornada
-jornais
-jornal
-jornalismo
-jornalistas
-judicial
-judiciário
-juiz
-julgamento
-julgar
 julho
 junho
-juntamente
-junto
-juntos
-juros
-jurídica
-jurídico
-justa
-justamente
-justifica
-justificar
-justificativa
-justiça
-justo
-juventude
-juíza
-juízes
-juízo
-já
 km
-laboratório
-lado
-lados
-ladrões
-lago
-lamentável
-lance
-lança
-lançado
-lançamento
-lançar
-lançou
-lar
-larga
-lateral
-latina
-lazer
-leal
-legais
-legal
-legenda
-legislativa
-legislativo
-legislação
-lei
-leia
-leilão
-leis
-leite
-leitor
-leitores
-leitura
-lembra
-lembrando
-lembrar
-lembro
-lembrou
-ler
-leste
-lesão
-letra
-letras
-leva
-levada
-levado
-levados
-levam
-levando
-levantamento
-levantar
 levar
 levaram
-leve
 levou
 lhe
 lhes
-li
-liberado
-liberação
-liberdade
-licença
-licitação
-lidar
-liderança
-lideranças
-liga
-ligada
-ligadas
-ligado
-ligados
-ligar
-ligação
-ligações
-liminar
-limite
-limites
-limpa
-limpeza
-linda
-linguagem
-linha
-linhas
-lista
-literatura
-litoral
-litros
-livre
-livres
-livro
-livros
-lixo
-lição
-locais
-local
-localidade
-localizada
-localizado
 logo
-loja
-lojas
-longa
-longe
-longo
-lua
-lucro
-lucros
-lugar
-lugares
-luta
-lutar
-luxo
-luz
-lá
-lê
-líder
-líderes
-língua
-líquido
-lógica
-madeira
-madrugada
 maio
-maior
-maiores
-maioria
-mais
-mal
-manda
-mandado
-mandar
-mandato
-mandatos
-mandou
-maneira
-manhã
-manifestação
-mano
-manter
-manteve
-mantido
-mantém
-manutenção
-mar
-marca
-marcada
-marcado
-marcador
-marcar
-marcas
-marcação
-marcou
-margem
-margens
-marido
-marinho
-marketing
-março
 mas
-masculino
-massa
-mata
-matar
-matemática
-materiais
-material
-mato
-matou
-matriz
-matéria
-matérias
-mau
-mauro
-maus
 me
-medalha
 mediante
-medida
-medidas
-medo
-meia
 meio
-meio-campo
-meios
-melhor
-melhora
-melhorar
-melhores
-melhoria
-melhorias
-melhorou
-membro
-membros
-memória
-menina
-meninas
-menino
-meninos
-menor
-menores
 menos
-mensagem
-mensagens
-mensais
-mensal
-mental
-mente
-mentira
-mercado
-mercadorias
-mercados
-merece
-merecem
-mesa
+mês
 meses
 mesma
 mesmas
 mesmo
 mesmos
-mestre
-meta
-metade
-metas
-metropolitana
 metros
 meu
 meus
 mil
-milhares
-milho
 milhão
-milhões
-militar
-militares
+milhares
 mim
-mineiro
-minha
 minhas
-ministra
-ministros
-minuto
-minutos
-mirim
-missão
-mistura
-mobilização
-moda
-modalidade
-modelo
-modelos
-moderna
-moderno
 modo
-moeda
 momento
 momentos
-montagem
 montante
-montar
-monte
-mora
-moradia
-morador
-moradora
-moradores
-morais
-moral
-moram
-morar
-morava
-moro
-morre
-morrer
-morreram
-morreu
-morro
-morte
-mortes
-morto
-mortos
 mostra
 mostram
 mostrando
 mostrar
 mostrou
-motivo
-motivos
-moto
-motor
-motoristas
-motos
-movimentação
-movimento
-movimentos
-moça
-muda
-mudança
-mudanças
-mudar
-mudou
 muita
 muitas
 muito
 muitos
-mulher
-mulheres
-multa
-multas
-mundial
-mundo
-municipais
-municipal
 município
 municípios
-muro
-museu
-musical
-má
-máquina
-máquinas
-máxima
-máximo
-mãe
-mães
-mão
-mãos
-média
-médica
-médio
-mérito
-mês
-mídia
-mínima
-mínimo
-mínimos
-móveis
-móvel
-música
-músicas
-músicos
 na
-nacionais
-nacional
-nada
-namorada
-namorado
+nao
+não
 naquela
+naquelas
 naquele
+naqueles
 nas
 nasceu
 nascido
-nascimento
-naturais
-natural
-naturalmente
-natureza
-nação
-nações
-necessidade
-necessidades
-necessita
-necessária
-necessárias
-necessário
-necessários
-nega
-negar
-negativa
-negativo
-negociar
-negociação
-negociações
-negou
-negra
-negro
-negros
-negócio
-negócios
 nela
 nele
 nem
@@ -2547,259 +721,83 @@ nessas
 nesse
 nesses
 nesta
+nestas
 neste
 nestes
-neto
-news
 ninguém
 nisso
 no
-nobre
-noite
-noites
 nome
 nomes
-norma
-normal
-normalmente
-normas
 norte
 nos
+nós
 nossa
 nossas
 nosso
 nossos
-nota
-notas
-notícia
-notícias
-nova
 novamente
-novas
 nove
 novembro
-novidade
-novidades
-novo
-novos
 num
 numa
-nunca
-não
-né
-níveis
-nível
-nós
-núcleo
-número
-números
+numas
+nuns
 o
-objetivo
-objetivos
-objeto
-objetos
-obra
-obras
-obrigado
-obrigados
-obrigação
-observa
-observar
-observou
 obter
 obteve
 ocasião
 ocorre
 ocorrem
+ocorrência
+ocorrências
 ocorrer
 ocorreram
 ocorreu
 ocorrido
-ocorrência
-ocorrências
-ocupa
-ocupar
-ocupação
-oeste
-oferece
-oferecem
-oferecer
-oferecido
-oferecidos
-oferta
-oficiais
-oficial
-oficialmente
-oficina
-oficinas
-ofício
+oitavo
 oito
-olha
-olhando
-olhar
-olho
-olhos
-oliveira
-olímpico
 onda
 onde
-ong
-online
 ontem
-operação
-operações
-opinião
-opiniões
-oportunidade
-oportunidades
-optar
-opção
-opções
+onze
 ora
-ordem
-organismo
-organizada
-organizado
-organizar
-organização
-organizações
-orientação
-origem
-original
-orçamento
 os
 ou
-ouro
 outra
 outras
 outro
 outros
 outubro
-ouvi
-ouvido
-ouvidos
-ouvir
-ouviu
-paciente
-pacientes
-paciência
-pacote
-padrão
-padrões
-paga
-pagam
-pagamento
-pagamentos
-pagando
-pagar
-pago
-pagos
-pagou
-pai
-paixão
-palanque
-palavra
-palavras
-palco
-palestra
-palestras
-palácio
-papai
-papel
-papéis
-par
 para
-parabéns
-parada
-parado
-paralisação
-paranaense
-parar
-parceiro
-parceiros
-parcela
-parceria
-parcerias
 parece
 parecem
 parecer
 parecia
-paredes
-parentes
-parlamentar
-parlamentares
-parlamento
-parou
-parque
 parte
 partes
-participa
-participam
-participantes
-participar
-participaram
-participação
-participou
-particular
-particulares
-partida
-partidas
-partido
-partidos
-partidária
-partir
 partiu
 passa
 passada
-passado
-passageiros
-passagem
-passagens
 passam
 passando
 passar
-passaram
 passará
+passaram
 passava
 passe
 passei
-passeio
-passo
-passo fundo
-passos
-passou
-pasta
-patamar
-patrimônio
-pau
-paula
-paulistas
-pauta
-pavimentação
-paz
-país
-países
+pé
+peça
+peças
 pede
-pedido
-pedidos
 pedindo
 pedir
-pediu
-pedra
-pedras
-pega
 pegar
 pegou
-peito
-peixe
-peixes
 pela
 pelas
-pele
-pelo
 pelos
-pena
-penal
 pensa
 pensam
 pensamento
@@ -2809,360 +807,68 @@ pensar
 pensei
 penso
 pensou
-pensão
-pequena
-pequenas
-pequeno
-pequenos
-perante
-percebe
-perceber
-percebeu
-percentual
-percurso
-perda
-perdas
-perde
-perdendo
-perder
-perderam
-perdeu
-perdido
-perfeito
-perfil
-pergunta
-perguntar
-perguntas
 perguntou
-perigo
-perigoso
 permanece
 permanecer
-permaneceu
-permanente
-permanência
-permite
-permitido
-permitir
-permitiu
-perna
-pernas
-personagem
-personagens
-personalidade
-perspectiva
-pertence
-perto
-período
-períodos
-pesado
-pesca
-peso
-pesquisa
-pesquisadores
-pesquisas
-pessoa
-pessoais
-pessoal
-pessoalmente
-pessoas
-peça
-peças
-piloto
-pilotos
-pintura
-pior
-piores
-piso
-pista
-placa
-placas
-planejamento
-planeta
-plano
-planos
-planta
-plantas
-plantio
-plantão
-plateia
-pleito
-plena
 pleno
-plenário
-plástico
-pneus
-pobre
-pobres
-pobreza
 pode
+pôde
 pode-se
 podem
 podemos
 podendo
 poder
-poderes
-poderia
-poderiam
 poderá
 poderão
+poderia
+poderiam
 podia
-poeta
+põe
+põem
 pois
-policiais
-policial
-politicamente
-polêmica
-políticas
-político
-políticos
-ponta
-ponte
-ponto
-pontos
-popular
-populares
-população
 por
-porque
-porta
-portal
-portanto
-portas
-porte
-porto
-português
 porém
-posicionamento
-positiva
-positivo
-positivos
-posição
-posições
+porque
+porquê
 possa
 possam
-posse
-possibilidade
-possibilidades
+possíveis
+possível
+possivelmente
 posso
 possuem
 possui
-possíveis
-possível
-posteriormente
-posto
-postos
-postura
-potencial
 pouca
 poucas
-pouco
 poucos
-povo
-povos
 pps
 pq
 pra
-praia
-praias
-prata
 praticamente
-praticar
-prato
-pratos
-prazer
-prazo
-prazos
-praça
-praças
-precisa
-precisam
-precisamos
-precisar
-precisava
-preciso
-precisou
-preconceito
-preencher
-prefeita
-prefeitos
-prefeituras
-prefere
-preferiu
-preferência
-prejudicar
-prejuízo
-prejuízos
-premiação
-preocupa
-preocupado
-preocupar
-preocupação
-prepara
-preparado
-preparados
-preparar
-preparação
-presa
-presente
-presentes
-presença
-preservar
-preservação
-presidencial
-presidente
-presidentes
-presidência
-preso
-presos
-pressão
-prestar
-prestação
-presídio
-preta
-pretende
-preto
-prevenção
-previdência
-prevista
-previstas
-previsto
-previstos
-previsão
-prevê
-preço
-preços
-primavera
 primeira
 primeiras
 primeiro
 primeiros
-principais
-principal
-principalmente
-princípio
-princípios
-prioridade
-prioridades
-prisão
-privada
-privado
-pro
-problema
-problemas
-procedimento
-procedimentos
-processo
-processos
-procura
-procurado
-procurador
-procuram
-procurando
-procurar
-procure
-procurou
-produtividade
-produto
-produtor
-produtores
-produtos
-produz
-produzido
-produzir
-produção
-professor
-professora
-profissionais
-profissional
-profissão
-profunda
-programa
-programas
-programação
-progresso
-proibido
-projeto
-projetos
-prol
-promessa
-promessas
-promete
-prometeu
-promotor
-promove
-promover
-promovido
-promoção
 pronta
 pronto
-propaganda
-proposta
-propostas
-propriedade
-propriedades
-proprietário
-proprietários
-propósito
-propõe
-proteger
-protesto
-proteção
-prova
-provar
-provas
-provavelmente
-providências
-provisória
-provocar
-provocou
-provável
-proximidades
-prudente
-prática
-práticas
-pré-candidato
-prédio
-prédios
-prévia
-prêmio
-prêmios
+propios
 própria
 próprias
+proprio
 próprio
 próprios
+provável
+provavelmente
 próxima
 próximas
+proximidades
 próximo
 próximos
-publicada
-publicado
-publicação
-publicidade
+puderam
 pudesse
-punição
-pura
-página
-páginas
-pátio
-pão
-pé
-pés
-pênalti
-pólo
-pública
-públicas
-público
-públicos
-quadra
-quadrados
-quadrilha
-quadro
-quadros
 quais
+quáis
 qual
-qualidade
-qualificação
 qualquer
 quando
 quantas
@@ -3172,279 +878,56 @@ quanto
 quantos
 quarta
 quarta-feira
-quarto
 quase
 quatro
 que
-quebra
-quebrar
-queda
-queira
+quê
 quem
-quente
-quer
-querem
-queremos
-querendo
-querer
-queria
-queriam
 querido
 quero
-questionado
-questão
-questões
-quilos
+quieto
 quilômetros
+quilos
 quinta
 quinta-feira
 quinto
-quis
-quiser
-rainha
-ramo
-ranking
-rapaz
-rapidamente
-razão
-razões
+quinze
 reais
-reajuste
 real
-realidade
-realiza
-realizada
-realizadas
-realizado
-realizados
-realizando
-realizar
-realização
 realizou
 realmente
-reação
-rebaixamento
-recado
-recebe
-recebem
-recebendo
-receber
-receberam
-receberá
-recebeu
-recebi
-recebido
-receita
-receitas
 recente
 recentemente
 recentes
-reclama
-reclamar
-reclamação
-reclamações
-reclamou
-reconhece
-reconhecer
-reconhecido
-reconhecimento
-recorde
-recorrer
-recuperar
-recuperação
-recurso
-recursos
-redação
-rede
-redes
 redonda
 redor
-reduzir
-redução
-reeleito
-reeleição
-refere
-referente
-referência
-reflete
-refletir
-reflexão
-reforma
-reformas
-reforçar
-reforço
-regime
-regionais
-regional
-registrada
-registrado
-registrados
-registrar
-registro
-registros
-registrou
-região
-regiões
-regra
-regras
-regular
-rei
-reino
-reivindicações
 relacionados
-relacionamento
-relacionamentos
-relata
-relator
-relatou
-relatório
-relação
-relações
-religioso
-remuneração
-remédio
-remédios
-renda
-rendimento
-renovação
-repasse
-repente
-repercussão
-repetir
-reportagem
-representa
-representam
-representante
-representantes
-representar
-representação
-repórter
-república
-reserva
-reservas
-resgate
-residência
-residências
-resistência
-resolução
-resolve
-resolver
-resolveu
 resolvido
-respectivamente
-respeitar
-respeito
-responde
-responder
-respondeu
-responsabilidade
-responsáveis
-responsável
 resposta
 respostas
 ressalta
 ressaltar
-ressaltou
-resta
-restante
-restaurante
-restaurantes
 resto
 resultado
 resultados
-retirada
-retirar
 retornar
-retorno
-reunir
-reuniu
-reunião
-reuniões
-revela
-revelou
-rever
-reverter
-revista
-revistas
-revisão
-revolução
-reúne
-rica
-rico
-ricos
-rio
-rio de janeiro
-rios
-riqueza
-risco
-riscos
-ritmo
-rival
-rock
-rodada
-rodadas
-rodovia
-rodovias
-rodoviária
-romance
-rosto
-roteiro
-rotina
-roubo
-roupa
-roupas
-rua
-ruas
-rubro-negro
-ruim
-rumo
-rurais
-rural
-rádio
-rápida
-rápido
+sábado
+sábados
 sabe
-sabedoria
 sabem
 sabemos
 sabendo
 saber
 sabia
-saco
 saem
 sai
 saia
 saiba
 saindo
 sair
-saiu
-sala
-salarial
-salas
-saldo
-salto
-salvar
-salário
-salários
-salão
-saneamento
-sangue
-santista
-satisfação
-satisfeito
-saudade
-saudável
-saída
 saíram
+são
 se
-secretaria
-secretarias
-secretário
-secretários
-sede
-segmento
-segmentos
-segredo
 segue
 seguem
 seguida
@@ -3458,230 +941,131 @@ segunda
 segunda-feira
 segundo
 segundos
-segura
-segurança
-segurar
 seguro
 sei
 seis
 seja
 sejam
 sejamos
-seleção
 sem
 semana
 semanas
 semelhante
 semelhantes
-semestre
-seminário
 sempre
-senado
-senador
-senadora
-senadores
+senão
 sendo
 senhor
 senhora
 senhores
-sensação
 senso
 sente
-sentença
 sentido
 sentimento
 sentimentos
 sentindo
 sentir
 sentiu
-senão
 sequer
-sequência
 ser
+será
+serão
 serei
 serem
 seremos
 seres
 seria
 seriam
-serve
-servidor
-servidores
-servir
-serviu
-serviço
-serviços
-será
-serão
 seríamos
+série
+sério
+serve
 sessão
 sessões
 sete
 setembro
+sétima
+sétimo
 setor
 setores
 seu
 seus
-sexo
 sexta
 sexta-feira
-sexual
-shopping
-show
-shows
+sexto
 si
 sido
-sigilo
-sigla
-significa
-significado
-silêncio
 sim
 simples
 simplesmente
-sinais
-sinal
 sinto
-sintomas
-sistema
-sistemas
 site
 sites
-situação
-situações
+sítio
+só
 sob
 sobe
 sobra
 sobre
 sobretudo
-sobrinho
-sociais
-social
-socorro
-sofre
-sofrem
-sofrendo
-sofrer
-sofreu
-sofrimento
-sol
-soldados
-solenidade
-solicitação
-solicitou
-solidariedade
+sois
 solo
-solução
-soluções
-som
-soma
-sombra
 somente
 somos
-sonho
-sonhos
-sono
-sorriso
-sorte
-sorteio
 sou
 soube
 sousa
 sozinha
-sozinho
 sua
 suas
 subir
 subiu
-substituir
-substituição
-sucesso
-sucessão
 sudeste
 suficiente
 suficientes
 sugere
-sugestão
-sugestões
 sujeito
 sul
-super
-superar
-superintendente
-superior
-superiores
-supermercado
-superou
-suplente
-suporte
-suposto
-supremo
-surge
-surgiu
-surpresa
-suspeita
-suspeito
-suspeitos
-suspensão
-sábado
-sábados
-são
-século
-série
-sério
-sítio
-só
-sócios
-tabela
+tá
 tais
 tal
-talento
 talvez
-tamanho
+tambem
 também
 tanta
 tantas
 tanto
 tantos
-taques
-tarde
-tarefa
-tarifa
-taxa
-taxas
-taça
+tão
 te
-teatro
-tecnologia
-tecnologias
-tela
-telefone
-telefones
-televisão
 tem
-tema
-temas
+tém
+têm
 temos
-temperatura
-tempo
 temporada
-tempos
 tende
-tendo
 tendência
+tendes
+tendo
 tenha
 tenham
 tenhamos
 tenho
+tens
 tenta
 tentam
 tentando
 tentar
 tentaram
 tentativa
+tente
+tentei
 tentou
-teoria
 ter
+terá
+terão
+terça
+terça-feira
 terceira
 terceiro
 terei
@@ -3689,92 +1073,52 @@ terem
 teremos
 teria
 teriam
-termina
-terminal
-terminar
-terminou
-termo
-termos
-terra
-terras
-terreno
-terrenos
-território
-terá
-terão
-terça
-terça-feira
 teríamos
-tese
-tesouro
-teste
-testemunhas
-testes
-teto
 teu
 teus
 teve
-texto
-textos
 the
 ti
 tido
-time
 times
 tinha
 tinham
-tio
+tínhamos
 tipo
 tipos
-tira
-tirar
-tiro
-tiros
 tirou
-titular
-titulares
 tive
 tivemos
 tiver
 tivera
 tiveram
+tivéramos
 tiverem
 tivermos
 tivesse
 tivessem
-tivéramos
 tivéssemos
+tiveste
+tivestes
 tocar
-tocou
 toda
 todas
 todo
 todos
-tom
 toma
-tomada
-tomadas
 tomando
 tomar
 tomou
 toneladas
 toque
-torcedor
-torcedores
-torcida
 torna
 tornando
 tornar
-torneio
-torno
 tornou
 tornou-se
-torres
 total
 totalmente
 trabalha
-trabalhador
-trabalhadores
 trabalham
 trabalhando
 trabalhar
@@ -3782,281 +1126,101 @@ trabalhava
 trabalho
 trabalhos
 trabalhou
-tradicionais
-tradicional
-tradição
-traficantes
-tragédia
-trajetória
-tranquilidade
-transferência
-transformar
-transformação
-transformou
-transição
-transmissão
-transparência
-transporte
-transportes
-trata
+trás
 trata-se
-tratado
-tratamento
-tratar
-trave
-travessão
 traz
 trazendo
 trazer
-trecho
-trechos
-treinador
-treinamento
-treino
-trem
-tribuna
-tribunal
-tributária
+três
+treze
 trimestre
 trinta
 trio
-triste
-tristeza
-troca
-trocar
-troféu
 trouxe
-tráfego
-trás
-três
 tu
 tua
 tuas
 tudo
-turismo
-turistas
-turma
-tv
-twitter
-tá
-tão
-técnica
-técnicas
-técnico
-técnicos
-tém
-término
-tênis
-tínhamos
-título
-títulos
+última
+últimas
+último
+últimos
 um
 uma
 umas
-unidade
+única
 unidades
-unidos
-unir
-universidade
-universidades
-universitário
-universo
-união
 uns
-urbana
-urbano
-urgência
-urnas
+usa
 usada
 usado
 usados
 usam
 usando
 usar
-usina
-usinas
 uso
 usou
 usuário
 usuários
+útil
+utilização
 utilizada
 utilizado
 utilizados
 utilizar
-utilização
-vacinação
-vaga
-vagas
+vá
 vai
+vais
 vale
-valer
-valor
-valores
-valorizar
-valorização
 vamos
-vantagem
-vantagens
-vara
-variação
+vão
+várias
+vários
 vc
 vcs
+vê
 veio
-vejo
 velha
 velho
 velhos
-velocidade
 vem
+vêm
 vemos
-vence
-vencedor
-vencer
-venceu
-venda
-vendas
-vender
-vendidos
 vendo
 venha
-vento
+vens
 ver
 vera
-verba
-verbas
-verdade
-verdadeira
-verdadeiro
-verdadeiros
-verde
-vereador
-vereadora
-vereadores
-vergonha
-verificar
-vermelha
-vermelho
-versão
-verão
 vez
 vezes
-veículo
-veículos
 vi
 via
-viagem
-viagens
-viajar
 vias
-vice
-vice-governador
-vice-prefeito
-vice-presidente
-vida
-vidas
 vieram
-vigor
-vila
 vinda
 vindo
 vinha
-vinho
 vinte
-violência
 vir
 vira
 virada
 viram
 virar
 virou
-virtude
-visa
-visando
-visita
-visitantes
-visitar
-visitas
-vista
-visto
-visual
-visão
-vitória
-vitórias
-viu
-viva
-vive
-vivem
-vivemos
-vivendo
-viver
-viveu
-vivo
-vizinho
-vizinhos
 você
 vocês
-volante
 volta
 voltada
 voltado
-voltam
-voltando
 voltar
 voltaram
 voltou
-volume
-voluntários
 vontade
-voos
 vos
-votado
-votar
-votação
-voto
-votos
-votou
+vós
+vossa
+vossas
+vosso
+vossos
 vou
-voz
-vá
-várias
-vários
-várzea
-vão
-véspera
-vê
-vídeo
-vídeos
-vítima
-vítimas
-vôo
-zagueiro
-zero
-zona
-à
-às
-água
-águas
-árbitro
-área
-áreas
-árvore
-árvores
-época
-éramos
-êxito
-índia
-índice
-índices
-óleo
-órgão
-órgãos
-ótima
-ótimo
-ônibus
-última
-últimas
-último
-últimos
-única
-único
-útil
+zero
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/pt/pt_stop_words_old.txt b/apps/common/src/python/mediawords/languages/pt/pt_stop_words_old.txt
new file mode 100644
index 0000000000..d49861eea5
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/pt/pt_stop_words_old.txt
@@ -0,0 +1,4062 @@
+#
+# This is a "long" stop word list for the Portuguese language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#
+
+a
+a meta
+abaixo
+abastecimento
+aberta
+abertas
+aberto
+abertos
+abertura
+abraço
+abre
+abreu
+abril
+abrir
+abriu
+absoluta
+absolutamente
+absurdo
+abuso
+acaba
+acabam
+acabar
+acabaram
+acabou
+academia
+acaso
+aceita
+aceitar
+aceitou
+acertar
+acertou
+acesso
+acha
+acham
+achar
+achei
+acho
+achou
+acidente
+acidentes
+acima
+acompanha
+acompanhada
+acompanhado
+acompanhamento
+acompanhar
+acompanhou
+acontece
+acontecem
+acontecendo
+acontecer
+acontecerá
+aconteceu
+acontecido
+acontecimentos
+acordo
+acredita
+acreditam
+acreditar
+acredito
+acrescenta
+acrescentou
+acumulado
+acusado
+acusados
+acusação
+acusações
+adequada
+adequado
+adesão
+adianta
+adiante
+adiantou
+administrador
+administrar
+administrativa
+administrativo
+administração
+admite
+admitiu
+adolescente
+adolescentes
+adotar
+adoção
+adquirir
+adultos
+adversário
+adversários
+advogada
+advogado
+advogados
+aeroporto
+afastado
+afastar
+afinal
+afirma
+afirmam
+afirmando
+afirmar
+afirmou
+agenda
+agente
+agentes
+agir
+agora
+agosto
+agradecer
+agressão
+agricultores
+agricultura
+agrícola
+aguarda
+aguardar
+agência
+agências
+ah
+ainda
+ajuda
+ajudam
+ajudar
+ajudou
+ala
+alcançar
+alega
+alegou
+alegre
+alegria
+alemão
+alerta
+algo
+algum
+alguma
+algumas
+alguns
+alguém
+ali
+aliado
+aliados
+aliança
+alimentar
+alimentação
+alimento
+alimentos
+aliás
+alma
+almoço
+alta
+altas
+alteração
+alterações
+alternativa
+alternativas
+alto
+altos
+altura
+aluguel
+aluno
+alunos
+alves
+alvinegro
+alvo
+além
+ama
+amanhã
+amarelo
+ambas
+ambientais
+ambiental
+ambiente
+ambos
+ameaça
+ameaças
+americano
+americanos
+amiga
+amigo
+amigos
+amizade
+amor
+ampla
+ampliar
+ampliação
+amplo
+analisa
+analisar
+analistas
+anda
+andamento
+andar
+animais
+animal
+animação
+aniversário
+ano
+anos
+ante
+anterior
+anteriores
+anteriormente
+antes
+antiga
+antigo
+antigos
+anual
+anuncia
+anunciado
+anunciar
+anunciou
+análise
+anúncio
+ao
+aos
+aparece
+aparecem
+aparecer
+apareceu
+aparecida
+aparelho
+aparelhos
+apartamento
+apelo
+apenas
+apesar
+aplicada
+aplicado
+aplicar
+aplicação
+apoia
+apoiar
+apoio
+aponta
+apontam
+apontou
+aposentado
+aposentadoria
+aposentados
+aposta
+apreensão
+aprender
+aprendizado
+apresenta
+apresentada
+apresentadas
+apresentado
+apresentados
+apresentam
+apresentar
+apresentaram
+apresentação
+apresentações
+apresentou
+aprovada
+aprovado
+aprovados
+aprovar
+aprovação
+aproveitar
+aproveitou
+aproximadamente
+apuração
+após
+aquela
+aquelas
+aquele
+aqueles
+aqui
+aquilo
+aquisição
+ar
+areia
+arena
+argumento
+argumentos
+arma
+armado
+armas
+arrecadação
+arroz
+arruda
+art
+arte
+artes
+artigo
+artigos
+artilheiro
+artista
+artistas
+as
+asfalto
+aspecto
+aspectos
+assaltantes
+assalto
+assassinato
+assembleia
+assessor
+assessores
+assessoria
+assim
+assinado
+assinar
+assinatura
+assinou
+assistente
+assistir
+assistência
+associados
+associação
+associações
+assume
+assumir
+assumiu
+assunto
+assuntos
+at
+atacante
+atacar
+ataque
+ataques
+atende
+atendendo
+atender
+atendidas
+atendido
+atendidos
+atendimento
+atendimentos
+atento
+atenção
+atinge
+atingido
+atingir
+atingiu
+atitude
+atitudes
+ativa
+atividade
+atividades
+atleta
+atletas
+ato
+ator
+atores
+atos
+atrair
+atraso
+através
+atração
+atrações
+atriz
+atrás
+atua
+atuais
+atual
+atualizado
+atualmente
+atuam
+atuando
+atuar
+atuação
+atuou
+até
+auditório
+audiência
+aula
+aulas
+aumenta
+aumentando
+aumentar
+aumento
+aumentou
+ausência
+automóveis
+automóvel
+autonomia
+autor
+autores
+autoria
+autoridade
+autoridades
+autorização
+autos
+auxiliar
+auxílio
+avalia
+avaliar
+avaliação
+avaliou
+avançar
+avanço
+avanços
+avançou
+avenida
+avisa
+avião
+avó
+azul
+ação
+ações
+aérea
+aí
+baiano
+baile
+bairro
+bairros
+baixa
+baixo
+baixos
+balanço
+bancada
+banco
+bancos
+banda
+bandas
+bandeira
+bandido
+bandidos
+banheiro
+banho
+bar
+barato
+barco
+bares
+barra
+barreiras
+barros
+barulho
+base
+baseado
+bases
+basta
+bastante
+bastidores
+batalha
+batalhão
+bate
+bater
+bateria
+bateu
+beber
+bebida
+bebidas
+bebê
+beira
+bela
+beleza
+belo
+bem
+beneficiar
+benefício
+benefícios
+bens
+bernardo
+biblioteca
+bicicleta
+bilhão
+bilhões
+bloco
+blocos
+blog
+boa
+boas
+boca
+bola
+boletim
+bolsa
+bolsas
+bolso
+bom
+bomba
+bombeiros
+bonita
+bonito
+bons
+branca
+branco
+brancos
+brasileiras
+brasileiro
+brasileiros
+braço
+braços
+breve
+briga
+brilhante
+brincadeira
+brincar
+brinquedos
+bruto
+buraco
+buracos
+busca
+buscam
+buscando
+buscar
+básica
+básicas
+básico
+cabe
+cabeceou
+cabelo
+cabelos
+cabeça
+cabo
+cachorro
+cada
+cadastro
+cadeia
+cadeira
+cadeiras
+cadê
+cai
+cair
+caiu
+caixa
+caixas
+calendário
+calma
+calor
+calçada
+cama
+caminhada
+caminho
+caminhos
+caminhão
+caminhões
+camisa
+campanha
+campanhas
+campeonato
+campeão
+campeões
+campo
+campos
+campus
+cana
+canal
+candidata
+candidato
+candidatos
+candidatura
+candidaturas
+canto
+cantor
+cantora
+caos
+capa
+capacidade
+capacitação
+capaz
+capazes
+capitais
+capital
+capitão
+capixaba
+capítulo
+cara
+característica
+características
+caras
+carga
+cargo
+cargos
+carinho
+carioca
+carne
+caro
+carreira
+carro
+carros
+carta
+cartas
+carteira
+cartão
+cartório
+cartões
+caruaru
+caráter
+casa
+casado
+casal
+casamento
+casar
+casas
+caso
+casos
+cassado
+cassação
+castelo
+catarinense
+categoria
+categorias
+causa
+causar
+causas
+causou
+caíram
+cd
+cedo
+celular
+cem
+cemitério
+cena
+cenas
+centenas
+cento
+centrais
+central
+centro
+centros
+cenário
+cerca
+cerimônia
+certa
+certamente
+certas
+certeza
+certo
+certos
+cerveja
+chama
+chamada
+chamado
+chamados
+chamar
+chamou
+chance
+chances
+chapa
+chave
+chefe
+chega
+chegada
+chegam
+chegamos
+chegando
+chegar
+chegaram
+chegou
+cheguei
+cheia
+cheio
+cheiro
+cheque
+choque
+chute
+chutou
+chuva
+chuvas
+chão
+ciclo
+cidadania
+cidade
+cidades
+cidadão
+cidadãos
+cientistas
+cima
+cinco
+cinema
+circo
+circuito
+circulação
+cirurgia
+cita
+citado
+citar
+citou
+civil
+civis
+ciência
+ciências
+classe
+classes
+classificação
+cliente
+clientes
+clima
+clique
+clube
+clubes
+clássico
+clínica
+cobertura
+cobra
+cobrança
+cobrar
+cobrou
+cofres
+coisa
+coisas
+colaboradores
+colega
+colegas
+coleta
+coletiva
+coletivo
+coleção
+coligação
+coloca
+colocada
+colocado
+colocados
+colocando
+colocar
+colocação
+colocou
+coloque
+colorado
+coluna
+colunista
+colégio
+com
+comandante
+comando
+combate
+combater
+combustível
+comecei
+comemora
+comemorar
+comemoração
+comemorou
+comenta
+comentar
+comentou
+comentário
+comentários
+comer
+comerciais
+comercial
+comercialização
+comerciante
+comerciantes
+começa
+começam
+começando
+começar
+começaram
+começo
+começou
+comida
+comigo
+comissão
+comitê
+como
+companheiro
+companheiros
+companhia
+companhias
+comparação
+competente
+competição
+competições
+competência
+complementar
+completa
+completamente
+completar
+completo
+completou
+complexo
+complicado
+compor
+comportamento
+composição
+composta
+composto
+compra
+comprar
+compras
+compreensão
+compromisso
+compromissos
+comprou
+computador
+computadores
+comum
+comunicado
+comunicação
+comunidade
+comunidades
+comuns
+comércio
+conceito
+conceitos
+concentração
+concessão
+conclui
+concluir
+concluiu
+conclusão
+concorda
+concordo
+concorrentes
+concorrer
+concorrência
+concreto
+concurso
+concursos
+condenado
+condenação
+condição
+condições
+conduta
+confederação
+conferir
+conferência
+confiança
+confira
+confirma
+confirmado
+confirmar
+confirmou
+conflito
+conflitos
+conforme
+conforto
+confronto
+confusão
+conhece
+conhecem
+conhecer
+conheceu
+conhecida
+conhecido
+conhecidos
+conhecimento
+conhecimentos
+conheço
+conjunto
+conquista
+conquistar
+conquistas
+conquistou
+consciente
+conscientização
+consciência
+consegue
+conseguem
+consegui
+conseguia
+conseguimos
+conseguir
+conseguiram
+conseguiu
+conselheiro
+conselho
+conselhos
+consenso
+conservação
+considera
+considerada
+considerado
+considerados
+considerando
+considerar
+consideração
+considerou
+consigo
+consta
+constante
+constantes
+constitucional
+construir
+construção
+construída
+construído
+consulta
+consultas
+consultoria
+consumidor
+consumidores
+consumo
+consórcio
+conta
+contam
+contando
+contar
+contará
+contas
+contato
+contatos
+conter
+contexto
+conteúdo
+continua
+continuam
+continuar
+continuará
+continue
+continuidade
+continuou
+contou
+contra
+contrapartida
+contratado
+contratados
+contratar
+contratação
+contratações
+contrato
+contratos
+contribuinte
+contribuir
+contribuição
+controlar
+controle
+contrário
+contudo
+convencer
+convenção
+conversa
+conversar
+conversas
+convidado
+convidados
+convite
+conviver
+convivência
+convênio
+cooperativa
+coordenador
+coordenadora
+coordenação
+cor
+coragem
+coração
+cores
+coronel
+corpo
+corpos
+corre
+correndo
+corrente
+correr
+correta
+correto
+correção
+corrida
+cortar
+corte
+cortes
+costas
+costuma
+costumam
+cotidiano
+cozinha
+credibilidade
+creio
+cresce
+crescendo
+crescente
+crescer
+cresceu
+crescimento
+cria
+criada
+criado
+criando
+crianças
+criar
+criatividade
+criação
+crime
+crimes
+criminal
+criminalidade
+criminosos
+criou
+crise
+criticar
+criticou
+critério
+critérios
+cruzamento
+cruzes
+cruzou
+crédito
+créditos
+crítica
+críticas
+crítico
+cuidado
+cuidados
+cuidar
+cuja
+cujo
+culpa
+cultura
+culturais
+cultural
+cumprimento
+cumprir
+cumpriu
+cunha
+currículo
+curso
+cursos
+curto
+custa
+custo
+custos
+cá
+cães
+cérebro
+céu
+código
+cúpula
+da
+dada
+dado
+dados
+dando
+danos
+dantas
+dança
+daquela
+daquele
+daqueles
+daqui
+dar
+daria
+dará
+das
+data
+dava
+daí
+de
+de deus
+debaixo
+debate
+debates
+decide
+decidir
+decidiu
+decisão
+decisões
+declaração
+declarações
+declarou
+decoração
+decreto
+dedicação
+defende
+defender
+defendeu
+defensor
+defesa
+deficiência
+define
+definida
+definido
+definir
+definitivamente
+definiu
+definição
+deixa
+deixado
+deixam
+deixando
+deixar
+deixaram
+deixe
+deixou
+dela
+delas
+dele
+delegacia
+delegado
+deles
+demais
+demanda
+demandas
+demissão
+democrático
+demonstra
+demonstrou
+demora
+demorou
+dentre
+dentro
+denunciar
+denúncia
+denúncias
+departamento
+depende
+dependendo
+depender
+depoimento
+depoimentos
+depois
+deputada
+deputado
+deputados
+der
+deram
+derrota
+derrotado
+desafio
+desafios
+descoberta
+descobre
+descobrir
+descobriu
+desconto
+desculpas
+desde
+deseja
+desejar
+desejo
+desembargador
+desempenho
+desemprego
+desenvolver
+desenvolvido
+desenvolvimento
+desespero
+desfile
+despesas
+dessa
+dessas
+desse
+desses
+desta
+destaca
+destacar
+destacou
+destaque
+destaques
+destas
+deste
+destes
+destinado
+destinados
+destino
+desvio
+desviou
+detalhe
+detalhes
+determina
+determinado
+determinação
+determinou
+deu
+deve
+deve-se
+devem
+devemos
+dever
+deveria
+deveriam
+deverá
+deverão
+devia
+devido
+dez
+dezembro
+dezenas
+dia
+dia-a-dia
+diagnóstico
+diante
+diariamente
+dias
+dica
+dicas
+diferente
+diferentes
+diferença
+diferenças
+dificilmente
+dificuldade
+dificuldades
+difíceis
+difícil
+diga
+digital
+dignidade
+digo
+diminuir
+diminuição
+dinheiro
+diploma
+direito
+direta
+diretamente
+direto
+diretor
+diretora
+diretores
+diretoria
+diretório
+direção
+diria
+dirigente
+dirigentes
+dirigir
+disciplina
+disco
+discurso
+discussão
+discussões
+discutir
+disponíveis
+disponível
+disposição
+disposto
+disputa
+disputar
+dispõe
+disse
+disseram
+disso
+distante
+distribuição
+distribuídos
+distrito
+dito
+diversas
+diversos
+divisão
+divulgada
+divulgado
+divulgados
+divulgar
+divulgação
+divulgou
+diz
+dizem
+dizendo
+dizer
+dizia
+diálogo
+diária
+diárias
+diário
+do
+doação
+doações
+doce
+documentação
+documento
+documentos
+doente
+doença
+doenças
+dois
+domingo
+domínio
+dona
+dono
+donos
+dor
+dores
+dormir
+dos
+dose
+doutor
+duas
+duelo
+dupla
+dura
+durante
+duração
+duro
+durou
+dutra
+dvd
+dão
+década
+décadas
+déficit
+dívida
+dívidas
+dólar
+dólares
+dúvida
+dúvidas
+e
+e-mail
+economista
+econômica
+econômicas
+econômico
+econômicos
+edital
+edição
+edições
+educacional
+efeito
+efeitos
+efetivamente
+efetivo
+eficiente
+eficiência
+eis
+ela
+elaboração
+elas
+ele
+eleger
+elegeu
+eleita
+eleito
+eleitor
+eleitorado
+eleitorais
+eleitores
+eleitos
+eleição
+eleições
+elementos
+elenco
+eles
+eletrônica
+eletrônico
+elevado
+elevação
+elite
+elogios
+elétrica
+em
+embora
+emenda
+emendas
+emergência
+emissora
+emissoras
+emissão
+emocional
+emoção
+emoções
+empate
+empatou
+empenho
+empreendimento
+empreendimentos
+empregados
+emprego
+empregos
+empresa
+empresarial
+empresas
+empresário
+empresários
+empréstimo
+empréstimos
+encaminhado
+encaminhados
+encarar
+encerramento
+encerrou
+enchentes
+encontra
+encontrada
+encontrado
+encontrados
+encontram
+encontrar
+encontraram
+encontro
+encontros
+encontrou
+endereço
+energia
+enfatizou
+enfim
+enfrenta
+enfrentar
+engenharia
+engenheiro
+enorme
+enquanto
+ensinar
+ensino
+entanto
+entende
+entender
+entendeu
+entendimento
+entendo
+entidade
+entidades
+entra
+entrada
+entram
+entrar
+entraram
+entre
+entrega
+entregar
+entregou
+entregue
+entregues
+entretanto
+entrevista
+entrevistados
+entrevistas
+entrou
+então
+enviado
+enviar
+enviou
+envolve
+envolvendo
+envolvido
+envolvidos
+envolvimento
+episódio
+equilíbrio
+equipamento
+equipamentos
+equipe
+equipes
+equivalente
+era
+eram
+errado
+erro
+erros
+escala
+escanteio
+esclarecer
+escola
+escolar
+escolas
+escolha
+escolher
+escolheu
+escolhido
+esconder
+escrever
+escreveu
+escrita
+escrito
+escritor
+escritório
+esforço
+esforços
+espanhol
+espaço
+espaços
+especiais
+especial
+especialista
+especialistas
+especializada
+especialmente
+específica
+específico
+espera
+esperado
+esperamos
+esperando
+esperança
+esperar
+esperava
+espero
+espetáculo
+espiritual
+esportiva
+esportivo
+esposa
+espécie
+espécies
+espírito
+esquecer
+esquema
+esquerdo
+esquina
+essa
+essas
+esse
+essencial
+esses
+esta
+estabelece
+estabelecer
+estabelecimento
+estabelecimentos
+estabilidade
+estacionamento
+estado
+estados
+estaduais
+estadual
+estamos
+estar
+estaria
+estariam
+estará
+estarão
+estas
+estatal
+estatuto
+estatística
+estatísticas
+estava
+estavam
+estação
+este
+esteja
+estejam
+estejamos
+estes
+esteve
+estilo
+estimativa
+estimular
+estive
+estivemos
+estiver
+estivera
+estiveram
+estiverem
+estivermos
+estivesse
+estivessem
+estivéramos
+estivéssemos
+estou
+estrada
+estradas
+estrangeiros
+estranho
+estratégia
+estratégias
+estreia
+estrela
+estrelas
+estrutura
+estudar
+estudo
+estudos
+está
+estádio
+estágio
+estávamos
+estão
+etapa
+etapas
+etc
+etc.
+eu
+evento
+eventos
+eventual
+evidente
+evitar
+evolução
+ex-deputado
+ex-governador
+ex-prefeito
+ex-presidente
+exame
+exames
+exatamente
+excelente
+excelência
+excesso
+exceção
+exclusivamente
+executiva
+executivo
+execução
+exemplo
+exemplos
+exercer
+exercício
+exercícios
+exige
+exigir
+exigência
+exigências
+existe
+existem
+existentes
+existia
+existir
+existência
+expandir
+expansão
+expectativa
+expectativas
+experiência
+experiências
+explica
+explicar
+explicação
+explicações
+explicou
+exploração
+exportações
+exposição
+expressão
+expulso
+extensão
+exterior
+externa
+extra
+extremamente
+exército
+face
+facilidade
+facilitar
+facilmente
+faculdade
+faixa
+faixas
+fala
+falam
+falando
+falar
+falei
+falha
+falhas
+falou
+falta
+faltam
+faltando
+faltou
+fama
+familiar
+familiares
+famoso
+famílias
+faria
+farmácia
+farroupilha
+fará
+farão
+fase
+fato
+fator
+fatores
+fatos
+faturamento
+favor
+favorável
+faz
+fazem
+fazemos
+fazenda
+fazendo
+fazer
+fazia
+faça
+façam
+faço
+febre
+fechada
+fechado
+fechados
+fechamento
+fechar
+fechou
+federais
+federal
+federação
+feira
+feita
+feitas
+feito
+feitos
+felicidade
+feliz
+felizes
+feminina
+feminino
+fenômeno
+feriado
+feridos
+ferramenta
+ferramentas
+ferro
+festa
+festas
+festival
+fevereiro
+fez
+fica
+ficam
+ficamos
+ficando
+ficar
+ficaram
+ficaria
+ficará
+ficarão
+ficava
+ficha
+fico
+ficou
+fiel
+figura
+fila
+filha
+filhas
+filho
+filhos
+filme
+filmes
+filosofia
+fim
+finais
+final
+finalidade
+finalizou
+finalmente
+financeira
+financeiras
+financeiro
+financeiros
+financiamento
+finanças
+fins
+fique
+fiquei
+firme
+fiscais
+fiscal
+fiscalizar
+fiscalização
+fiz
+fizemos
+fizeram
+fiéis
+flagrante
+flor
+flores
+floresta
+fluxo
+foco
+fogo
+foi
+folha
+fomos
+fonte
+fontes
+for
+fora
+foram
+forem
+forma
+formada
+formado
+formar
+formas
+formato
+formação
+formos
+forró
+fortalecer
+forte
+força
+forças
+fosse
+fossem
+foto
+fotos
+fraco
+francês
+frase
+fraude
+freitas
+frente
+frio
+frisou
+fronteira
+frota
+frutas
+fruto
+frutos
+fuga
+fugir
+fugiu
+fui
+funciona
+funcionamento
+funcionando
+funcionar
+funcionário
+funcionários
+fundamentais
+fundamental
+fundação
+fundo
+fundos
+função
+funções
+furto
+futebol
+futsal
+futuro
+futuros
+fábrica
+fácil
+fãs
+fé
+férias
+física
+físicas
+físico
+fórmula
+fórum
+fôramos
+fôssemos
+gabinete
+gado
+galeria
+ganha
+ganham
+ganhando
+ganhar
+ganho
+ganhos
+ganhou
+garante
+garantia
+garantir
+garantiu
+garota
+garoto
+gastar
+gasto
+gastos
+gaúcha
+gaúchos
+general
+gente
+geografia
+gera
+gerais
+geral
+geralmente
+gerando
+gerar
+geração
+gerente
+gerou
+gestor
+gestores
+gestão
+ginásio
+global
+gol
+goleiro
+golpe
+gols
+gosta
+gostam
+gostaria
+gostei
+gosto
+gostou
+governador
+governadora
+governadores
+governantes
+governar
+governo
+governos
+gramado
+grande
+grandes
+gratuita
+gratuito
+grau
+grave
+graves
+graças
+grossa
+grosso
+grupo
+grupos
+grãos
+guarda
+guia
+gás
+gênero
+habitantes
+habitação
+haja
+hajam
+hajamos
+harmonia
+havemos
+haver
+haveria
+haverá
+havia
+haviam
+hectares
+hei
+helena
+hipótese
+história
+histórias
+histórica
+histórico
+hoje
+homem
+homenagem
+homens
+homicídio
+homicídios
+honra
+hora
+horas
+horizonte
+horário
+horários
+hospital
+hotel
+hotéis
+houve
+houvemos
+houver
+houvera
+houveram
+houverei
+houverem
+houveremos
+houveria
+houveriam
+houvermos
+houverá
+houverão
+houveríamos
+houvesse
+houvessem
+houvéramos
+houvéssemos
+hugo
+humana
+humanidade
+humano
+humanos
+humor
+há
+hábito
+hão
+ia
+ibope
+ida
+idade
+ideal
+identidade
+identificado
+identificar
+identificação
+idosos
+idéia
+idéias
+iguais
+igual
+igualdade
+ilegal
+ilha
+iluminação
+imagem
+imagens
+imaginar
+imediata
+imediatamente
+imediato
+impacto
+impede
+impedir
+implantar
+implantação
+impor
+importa
+importante
+importantes
+impossível
+imposto
+impostos
+imprensa
+impressão
+imóveis
+imóvel
+inauguração
+incentivar
+incentivo
+inclui
+incluindo
+inclusive
+inclusão
+incrível
+incêndio
+indenização
+independente
+independentemente
+independência
+indica
+indicado
+indicar
+indicação
+individuais
+individual
+indivíduo
+indivíduos
+indo
+industrial
+indícios
+indígena
+indígenas
+indústria
+indústrias
+infantil
+infelizmente
+inferior
+influência
+informa
+informado
+informar
+informação
+informações
+informou
+informática
+infra-estrutura
+infraestrutura
+inglês
+ingresso
+ingressos
+inicia
+iniciada
+inicial
+inicialmente
+iniciar
+iniciativa
+iniciativas
+iniciou
+inquérito
+inscritos
+inscrição
+inscrições
+instalada
+instalar
+instalação
+instalações
+institucional
+instituição
+instituições
+instituto
+instrumento
+instrumentos
+integra
+integral
+integrante
+integrantes
+integrar
+integração
+inteira
+inteiro
+inteligente
+inteligência
+intensa
+intenso
+intenção
+intenções
+inter
+interessa
+interessados
+interessante
+interesse
+interesses
+interior
+interna
+internacionais
+internacional
+internado
+internet
+interno
+interpretação
+intervalo
+intervenção
+intuito
+invadiu
+inverno
+investidores
+investigar
+investigação
+investigações
+investimento
+investimentos
+investir
+invés
+início
+inúmeras
+inúmeros
+ir
+iria
+irmã
+irmão
+irmãos
+irregular
+irregularidades
+irá
+irão
+isso
+isto
+italiano
+item
+itens
+jamais
+janeiro
+janela
+jantar
+jardim
+jc
+jeito
+joga
+jogada
+jogadas
+jogador
+jogadores
+jogando
+jogar
+jogo
+jogos
+jogou
+jornada
+jornais
+jornal
+jornalismo
+jornalistas
+judicial
+judiciário
+juiz
+julgamento
+julgar
+julho
+junho
+juntamente
+junto
+juntos
+juros
+jurídica
+jurídico
+justa
+justamente
+justifica
+justificar
+justificativa
+justiça
+justo
+juventude
+juíza
+juízes
+juízo
+já
+km
+laboratório
+lado
+lados
+ladrões
+lago
+lamentável
+lance
+lança
+lançado
+lançamento
+lançar
+lançou
+lar
+larga
+lateral
+latina
+lazer
+leal
+legais
+legal
+legenda
+legislativa
+legislativo
+legislação
+lei
+leia
+leilão
+leis
+leite
+leitor
+leitores
+leitura
+lembra
+lembrando
+lembrar
+lembro
+lembrou
+ler
+leste
+lesão
+letra
+letras
+leva
+levada
+levado
+levados
+levam
+levando
+levantamento
+levantar
+levar
+levaram
+leve
+levou
+lhe
+lhes
+li
+liberado
+liberação
+liberdade
+licença
+licitação
+lidar
+liderança
+lideranças
+liga
+ligada
+ligadas
+ligado
+ligados
+ligar
+ligação
+ligações
+liminar
+limite
+limites
+limpa
+limpeza
+linda
+linguagem
+linha
+linhas
+lista
+literatura
+litoral
+litros
+livre
+livres
+livro
+livros
+lixo
+lição
+locais
+local
+localidade
+localizada
+localizado
+logo
+loja
+lojas
+longa
+longe
+longo
+lua
+lucro
+lucros
+lugar
+lugares
+luta
+lutar
+luxo
+luz
+lá
+lê
+líder
+líderes
+língua
+líquido
+lógica
+madeira
+madrugada
+maio
+maior
+maiores
+maioria
+mais
+mal
+manda
+mandado
+mandar
+mandato
+mandatos
+mandou
+maneira
+manhã
+manifestação
+mano
+manter
+manteve
+mantido
+mantém
+manutenção
+mar
+marca
+marcada
+marcado
+marcador
+marcar
+marcas
+marcação
+marcou
+margem
+margens
+marido
+marinho
+marketing
+março
+mas
+masculino
+massa
+mata
+matar
+matemática
+materiais
+material
+mato
+matou
+matriz
+matéria
+matérias
+mau
+mauro
+maus
+me
+medalha
+mediante
+medida
+medidas
+medo
+meia
+meio
+meio-campo
+meios
+melhor
+melhora
+melhorar
+melhores
+melhoria
+melhorias
+melhorou
+membro
+membros
+memória
+menina
+meninas
+menino
+meninos
+menor
+menores
+menos
+mensagem
+mensagens
+mensais
+mensal
+mental
+mente
+mentira
+mercado
+mercadorias
+mercados
+merece
+merecem
+mesa
+meses
+mesma
+mesmas
+mesmo
+mesmos
+mestre
+meta
+metade
+metas
+metropolitana
+metros
+meu
+meus
+mil
+milhares
+milho
+milhão
+milhões
+militar
+militares
+mim
+mineiro
+minha
+minhas
+ministra
+ministros
+minuto
+minutos
+mirim
+missão
+mistura
+mobilização
+moda
+modalidade
+modelo
+modelos
+moderna
+moderno
+modo
+moeda
+momento
+momentos
+montagem
+montante
+montar
+monte
+mora
+moradia
+morador
+moradora
+moradores
+morais
+moral
+moram
+morar
+morava
+moro
+morre
+morrer
+morreram
+morreu
+morro
+morte
+mortes
+morto
+mortos
+mostra
+mostram
+mostrando
+mostrar
+mostrou
+motivo
+motivos
+moto
+motor
+motoristas
+motos
+movimentação
+movimento
+movimentos
+moça
+muda
+mudança
+mudanças
+mudar
+mudou
+muita
+muitas
+muito
+muitos
+mulher
+mulheres
+multa
+multas
+mundial
+mundo
+municipais
+municipal
+município
+municípios
+muro
+museu
+musical
+má
+máquina
+máquinas
+máxima
+máximo
+mãe
+mães
+mão
+mãos
+média
+médica
+médio
+mérito
+mês
+mídia
+mínima
+mínimo
+mínimos
+móveis
+móvel
+música
+músicas
+músicos
+na
+nacionais
+nacional
+nada
+namorada
+namorado
+naquela
+naquele
+nas
+nasceu
+nascido
+nascimento
+naturais
+natural
+naturalmente
+natureza
+nação
+nações
+necessidade
+necessidades
+necessita
+necessária
+necessárias
+necessário
+necessários
+nega
+negar
+negativa
+negativo
+negociar
+negociação
+negociações
+negou
+negra
+negro
+negros
+negócio
+negócios
+nela
+nele
+nem
+nenhum
+nenhuma
+nessa
+nessas
+nesse
+nesses
+nesta
+neste
+nestes
+neto
+news
+ninguém
+nisso
+no
+nobre
+noite
+noites
+nome
+nomes
+norma
+normal
+normalmente
+normas
+norte
+nos
+nossa
+nossas
+nosso
+nossos
+nota
+notas
+notícia
+notícias
+nova
+novamente
+novas
+nove
+novembro
+novidade
+novidades
+novo
+novos
+num
+numa
+nunca
+não
+né
+níveis
+nível
+nós
+núcleo
+número
+números
+o
+objetivo
+objetivos
+objeto
+objetos
+obra
+obras
+obrigado
+obrigados
+obrigação
+observa
+observar
+observou
+obter
+obteve
+ocasião
+ocorre
+ocorrem
+ocorrer
+ocorreram
+ocorreu
+ocorrido
+ocorrência
+ocorrências
+ocupa
+ocupar
+ocupação
+oeste
+oferece
+oferecem
+oferecer
+oferecido
+oferecidos
+oferta
+oficiais
+oficial
+oficialmente
+oficina
+oficinas
+ofício
+oito
+olha
+olhando
+olhar
+olho
+olhos
+oliveira
+olímpico
+onda
+onde
+ong
+online
+ontem
+operação
+operações
+opinião
+opiniões
+oportunidade
+oportunidades
+optar
+opção
+opções
+ora
+ordem
+organismo
+organizada
+organizado
+organizar
+organização
+organizações
+orientação
+origem
+original
+orçamento
+os
+ou
+ouro
+outra
+outras
+outro
+outros
+outubro
+ouvi
+ouvido
+ouvidos
+ouvir
+ouviu
+paciente
+pacientes
+paciência
+pacote
+padrão
+padrões
+paga
+pagam
+pagamento
+pagamentos
+pagando
+pagar
+pago
+pagos
+pagou
+pai
+paixão
+palanque
+palavra
+palavras
+palco
+palestra
+palestras
+palácio
+papai
+papel
+papéis
+par
+para
+parabéns
+parada
+parado
+paralisação
+paranaense
+parar
+parceiro
+parceiros
+parcela
+parceria
+parcerias
+parece
+parecem
+parecer
+parecia
+paredes
+parentes
+parlamentar
+parlamentares
+parlamento
+parou
+parque
+parte
+partes
+participa
+participam
+participantes
+participar
+participaram
+participação
+participou
+particular
+particulares
+partida
+partidas
+partido
+partidos
+partidária
+partir
+partiu
+passa
+passada
+passado
+passageiros
+passagem
+passagens
+passam
+passando
+passar
+passaram
+passará
+passava
+passe
+passei
+passeio
+passo
+passo fundo
+passos
+passou
+pasta
+patamar
+patrimônio
+pau
+paula
+paulistas
+pauta
+pavimentação
+paz
+país
+países
+pede
+pedido
+pedidos
+pedindo
+pedir
+pediu
+pedra
+pedras
+pega
+pegar
+pegou
+peito
+peixe
+peixes
+pela
+pelas
+pele
+pelo
+pelos
+pena
+penal
+pensa
+pensam
+pensamento
+pensamentos
+pensando
+pensar
+pensei
+penso
+pensou
+pensão
+pequena
+pequenas
+pequeno
+pequenos
+perante
+percebe
+perceber
+percebeu
+percentual
+percurso
+perda
+perdas
+perde
+perdendo
+perder
+perderam
+perdeu
+perdido
+perfeito
+perfil
+pergunta
+perguntar
+perguntas
+perguntou
+perigo
+perigoso
+permanece
+permanecer
+permaneceu
+permanente
+permanência
+permite
+permitido
+permitir
+permitiu
+perna
+pernas
+personagem
+personagens
+personalidade
+perspectiva
+pertence
+perto
+período
+períodos
+pesado
+pesca
+peso
+pesquisa
+pesquisadores
+pesquisas
+pessoa
+pessoais
+pessoal
+pessoalmente
+pessoas
+peça
+peças
+piloto
+pilotos
+pintura
+pior
+piores
+piso
+pista
+placa
+placas
+planejamento
+planeta
+plano
+planos
+planta
+plantas
+plantio
+plantão
+plateia
+pleito
+plena
+pleno
+plenário
+plástico
+pneus
+pobre
+pobres
+pobreza
+pode
+pode-se
+podem
+podemos
+podendo
+poder
+poderes
+poderia
+poderiam
+poderá
+poderão
+podia
+poeta
+pois
+policiais
+policial
+politicamente
+polêmica
+políticas
+político
+políticos
+ponta
+ponte
+ponto
+pontos
+popular
+populares
+população
+por
+porque
+porta
+portal
+portanto
+portas
+porte
+porto
+português
+porém
+posicionamento
+positiva
+positivo
+positivos
+posição
+posições
+possa
+possam
+posse
+possibilidade
+possibilidades
+posso
+possuem
+possui
+possíveis
+possível
+posteriormente
+posto
+postos
+postura
+potencial
+pouca
+poucas
+pouco
+poucos
+povo
+povos
+pps
+pq
+pra
+praia
+praias
+prata
+praticamente
+praticar
+prato
+pratos
+prazer
+prazo
+prazos
+praça
+praças
+precisa
+precisam
+precisamos
+precisar
+precisava
+preciso
+precisou
+preconceito
+preencher
+prefeita
+prefeitos
+prefeituras
+prefere
+preferiu
+preferência
+prejudicar
+prejuízo
+prejuízos
+premiação
+preocupa
+preocupado
+preocupar
+preocupação
+prepara
+preparado
+preparados
+preparar
+preparação
+presa
+presente
+presentes
+presença
+preservar
+preservação
+presidencial
+presidente
+presidentes
+presidência
+preso
+presos
+pressão
+prestar
+prestação
+presídio
+preta
+pretende
+preto
+prevenção
+previdência
+prevista
+previstas
+previsto
+previstos
+previsão
+prevê
+preço
+preços
+primavera
+primeira
+primeiras
+primeiro
+primeiros
+principais
+principal
+principalmente
+princípio
+princípios
+prioridade
+prioridades
+prisão
+privada
+privado
+pro
+problema
+problemas
+procedimento
+procedimentos
+processo
+processos
+procura
+procurado
+procurador
+procuram
+procurando
+procurar
+procure
+procurou
+produtividade
+produto
+produtor
+produtores
+produtos
+produz
+produzido
+produzir
+produção
+professor
+professora
+profissionais
+profissional
+profissão
+profunda
+programa
+programas
+programação
+progresso
+proibido
+projeto
+projetos
+prol
+promessa
+promessas
+promete
+prometeu
+promotor
+promove
+promover
+promovido
+promoção
+pronta
+pronto
+propaganda
+proposta
+propostas
+propriedade
+propriedades
+proprietário
+proprietários
+propósito
+propõe
+proteger
+protesto
+proteção
+prova
+provar
+provas
+provavelmente
+providências
+provisória
+provocar
+provocou
+provável
+proximidades
+prudente
+prática
+práticas
+pré-candidato
+prédio
+prédios
+prévia
+prêmio
+prêmios
+própria
+próprias
+próprio
+próprios
+próxima
+próximas
+próximo
+próximos
+publicada
+publicado
+publicação
+publicidade
+pudesse
+punição
+pura
+página
+páginas
+pátio
+pão
+pé
+pés
+pênalti
+pólo
+pública
+públicas
+público
+públicos
+quadra
+quadrados
+quadrilha
+quadro
+quadros
+quais
+qual
+qualidade
+qualificação
+qualquer
+quando
+quantas
+quantia
+quantidade
+quanto
+quantos
+quarta
+quarta-feira
+quarto
+quase
+quatro
+que
+quebra
+quebrar
+queda
+queira
+quem
+quente
+quer
+querem
+queremos
+querendo
+querer
+queria
+queriam
+querido
+quero
+questionado
+questão
+questões
+quilos
+quilômetros
+quinta
+quinta-feira
+quinto
+quis
+quiser
+rainha
+ramo
+ranking
+rapaz
+rapidamente
+razão
+razões
+reais
+reajuste
+real
+realidade
+realiza
+realizada
+realizadas
+realizado
+realizados
+realizando
+realizar
+realização
+realizou
+realmente
+reação
+rebaixamento
+recado
+recebe
+recebem
+recebendo
+receber
+receberam
+receberá
+recebeu
+recebi
+recebido
+receita
+receitas
+recente
+recentemente
+recentes
+reclama
+reclamar
+reclamação
+reclamações
+reclamou
+reconhece
+reconhecer
+reconhecido
+reconhecimento
+recorde
+recorrer
+recuperar
+recuperação
+recurso
+recursos
+redação
+rede
+redes
+redonda
+redor
+reduzir
+redução
+reeleito
+reeleição
+refere
+referente
+referência
+reflete
+refletir
+reflexão
+reforma
+reformas
+reforçar
+reforço
+regime
+regionais
+regional
+registrada
+registrado
+registrados
+registrar
+registro
+registros
+registrou
+região
+regiões
+regra
+regras
+regular
+rei
+reino
+reivindicações
+relacionados
+relacionamento
+relacionamentos
+relata
+relator
+relatou
+relatório
+relação
+relações
+religioso
+remuneração
+remédio
+remédios
+renda
+rendimento
+renovação
+repasse
+repente
+repercussão
+repetir
+reportagem
+representa
+representam
+representante
+representantes
+representar
+representação
+repórter
+república
+reserva
+reservas
+resgate
+residência
+residências
+resistência
+resolução
+resolve
+resolver
+resolveu
+resolvido
+respectivamente
+respeitar
+respeito
+responde
+responder
+respondeu
+responsabilidade
+responsáveis
+responsável
+resposta
+respostas
+ressalta
+ressaltar
+ressaltou
+resta
+restante
+restaurante
+restaurantes
+resto
+resultado
+resultados
+retirada
+retirar
+retornar
+retorno
+reunir
+reuniu
+reunião
+reuniões
+revela
+revelou
+rever
+reverter
+revista
+revistas
+revisão
+revolução
+reúne
+rica
+rico
+ricos
+rio
+rio de janeiro
+rios
+riqueza
+risco
+riscos
+ritmo
+rival
+rock
+rodada
+rodadas
+rodovia
+rodovias
+rodoviária
+romance
+rosto
+roteiro
+rotina
+roubo
+roupa
+roupas
+rua
+ruas
+rubro-negro
+ruim
+rumo
+rurais
+rural
+rádio
+rápida
+rápido
+sabe
+sabedoria
+sabem
+sabemos
+sabendo
+saber
+sabia
+saco
+saem
+sai
+saia
+saiba
+saindo
+sair
+saiu
+sala
+salarial
+salas
+saldo
+salto
+salvar
+salário
+salários
+salão
+saneamento
+sangue
+santista
+satisfação
+satisfeito
+saudade
+saudável
+saída
+saíram
+se
+secretaria
+secretarias
+secretário
+secretários
+sede
+segmento
+segmentos
+segredo
+segue
+seguem
+seguida
+seguido
+seguindo
+seguinte
+seguintes
+seguir
+seguiu
+segunda
+segunda-feira
+segundo
+segundos
+segura
+segurança
+segurar
+seguro
+sei
+seis
+seja
+sejam
+sejamos
+seleção
+sem
+semana
+semanas
+semelhante
+semelhantes
+semestre
+seminário
+sempre
+senado
+senador
+senadora
+senadores
+sendo
+senhor
+senhora
+senhores
+sensação
+senso
+sente
+sentença
+sentido
+sentimento
+sentimentos
+sentindo
+sentir
+sentiu
+senão
+sequer
+sequência
+ser
+serei
+serem
+seremos
+seres
+seria
+seriam
+serve
+servidor
+servidores
+servir
+serviu
+serviço
+serviços
+será
+serão
+seríamos
+sessão
+sessões
+sete
+setembro
+setor
+setores
+seu
+seus
+sexo
+sexta
+sexta-feira
+sexual
+shopping
+show
+shows
+si
+sido
+sigilo
+sigla
+significa
+significado
+silêncio
+sim
+simples
+simplesmente
+sinais
+sinal
+sinto
+sintomas
+sistema
+sistemas
+site
+sites
+situação
+situações
+sob
+sobe
+sobra
+sobre
+sobretudo
+sobrinho
+sociais
+social
+socorro
+sofre
+sofrem
+sofrendo
+sofrer
+sofreu
+sofrimento
+sol
+soldados
+solenidade
+solicitação
+solicitou
+solidariedade
+solo
+solução
+soluções
+som
+soma
+sombra
+somente
+somos
+sonho
+sonhos
+sono
+sorriso
+sorte
+sorteio
+sou
+soube
+sousa
+sozinha
+sozinho
+sua
+suas
+subir
+subiu
+substituir
+substituição
+sucesso
+sucessão
+sudeste
+suficiente
+suficientes
+sugere
+sugestão
+sugestões
+sujeito
+sul
+super
+superar
+superintendente
+superior
+superiores
+supermercado
+superou
+suplente
+suporte
+suposto
+supremo
+surge
+surgiu
+surpresa
+suspeita
+suspeito
+suspeitos
+suspensão
+sábado
+sábados
+são
+século
+série
+sério
+sítio
+só
+sócios
+tabela
+tais
+tal
+talento
+talvez
+tamanho
+também
+tanta
+tantas
+tanto
+tantos
+taques
+tarde
+tarefa
+tarifa
+taxa
+taxas
+taça
+te
+teatro
+tecnologia
+tecnologias
+tela
+telefone
+telefones
+televisão
+tem
+tema
+temas
+temos
+temperatura
+tempo
+temporada
+tempos
+tende
+tendo
+tendência
+tenha
+tenham
+tenhamos
+tenho
+tenta
+tentam
+tentando
+tentar
+tentaram
+tentativa
+tentou
+teoria
+ter
+terceira
+terceiro
+terei
+terem
+teremos
+teria
+teriam
+termina
+terminal
+terminar
+terminou
+termo
+termos
+terra
+terras
+terreno
+terrenos
+território
+terá
+terão
+terça
+terça-feira
+teríamos
+tese
+tesouro
+teste
+testemunhas
+testes
+teto
+teu
+teus
+teve
+texto
+textos
+the
+ti
+tido
+time
+times
+tinha
+tinham
+tio
+tipo
+tipos
+tira
+tirar
+tiro
+tiros
+tirou
+titular
+titulares
+tive
+tivemos
+tiver
+tivera
+tiveram
+tiverem
+tivermos
+tivesse
+tivessem
+tivéramos
+tivéssemos
+tocar
+tocou
+toda
+todas
+todo
+todos
+tom
+toma
+tomada
+tomadas
+tomando
+tomar
+tomou
+toneladas
+toque
+torcedor
+torcedores
+torcida
+torna
+tornando
+tornar
+torneio
+torno
+tornou
+tornou-se
+torres
+total
+totalmente
+trabalha
+trabalhador
+trabalhadores
+trabalham
+trabalhando
+trabalhar
+trabalhava
+trabalho
+trabalhos
+trabalhou
+tradicionais
+tradicional
+tradição
+traficantes
+tragédia
+trajetória
+tranquilidade
+transferência
+transformar
+transformação
+transformou
+transição
+transmissão
+transparência
+transporte
+transportes
+trata
+trata-se
+tratado
+tratamento
+tratar
+trave
+travessão
+traz
+trazendo
+trazer
+trecho
+trechos
+treinador
+treinamento
+treino
+trem
+tribuna
+tribunal
+tributária
+trimestre
+trinta
+trio
+triste
+tristeza
+troca
+trocar
+troféu
+trouxe
+tráfego
+trás
+três
+tu
+tua
+tuas
+tudo
+turismo
+turistas
+turma
+tv
+twitter
+tá
+tão
+técnica
+técnicas
+técnico
+técnicos
+tém
+término
+tênis
+tínhamos
+título
+títulos
+um
+uma
+umas
+unidade
+unidades
+unidos
+unir
+universidade
+universidades
+universitário
+universo
+união
+uns
+urbana
+urbano
+urgência
+urnas
+usada
+usado
+usados
+usam
+usando
+usar
+usina
+usinas
+uso
+usou
+usuário
+usuários
+utilizada
+utilizado
+utilizados
+utilizar
+utilização
+vacinação
+vaga
+vagas
+vai
+vale
+valer
+valor
+valores
+valorizar
+valorização
+vamos
+vantagem
+vantagens
+vara
+variação
+vc
+vcs
+veio
+vejo
+velha
+velho
+velhos
+velocidade
+vem
+vemos
+vence
+vencedor
+vencer
+venceu
+venda
+vendas
+vender
+vendidos
+vendo
+venha
+vento
+ver
+vera
+verba
+verbas
+verdade
+verdadeira
+verdadeiro
+verdadeiros
+verde
+vereador
+vereadora
+vereadores
+vergonha
+verificar
+vermelha
+vermelho
+versão
+verão
+vez
+vezes
+veículo
+veículos
+vi
+via
+viagem
+viagens
+viajar
+vias
+vice
+vice-governador
+vice-prefeito
+vice-presidente
+vida
+vidas
+vieram
+vigor
+vila
+vinda
+vindo
+vinha
+vinho
+vinte
+violência
+vir
+vira
+virada
+viram
+virar
+virou
+virtude
+visa
+visando
+visita
+visitantes
+visitar
+visitas
+vista
+visto
+visual
+visão
+vitória
+vitórias
+viu
+viva
+vive
+vivem
+vivemos
+vivendo
+viver
+viveu
+vivo
+vizinho
+vizinhos
+você
+vocês
+volante
+volta
+voltada
+voltado
+voltam
+voltando
+voltar
+voltaram
+voltou
+volume
+voluntários
+vontade
+voos
+vos
+votado
+votar
+votação
+voto
+votos
+votou
+vou
+voz
+vá
+várias
+vários
+várzea
+vão
+véspera
+vê
+vídeo
+vídeos
+vítima
+vítimas
+vôo
+zagueiro
+zero
+zona
+à
+às
+água
+águas
+árbitro
+área
+áreas
+árvore
+árvores
+época
+éramos
+êxito
+índia
+índice
+índices
+óleo
+órgão
+órgãos
+ótima
+ótimo
+ônibus
+última
+últimas
+último
+últimos
+única
+único
+útil
diff --git a/apps/common/src/python/mediawords/languages/ro/ro_stop_words.txt b/apps/common/src/python/mediawords/languages/ro/ro_stop_words.txt
old mode 100755
new mode 100644
index 2afa1eb3de..a426c284e6
--- a/apps/common/src/python/mediawords/languages/ro/ro_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/ro/ro_stop_words.txt
@@ -1,440 +1,591 @@
+# A Romanian stop word list.
+# Sources:
 #
-# This is a stop word list for the Romanian language.
-#
-# Source: http://snowball.tartarus.org/otherapps/romanian/intro.html (romanian2.tgz)
-#
-
- # A Romanian stop word list. Comments begin with vertical bar. Each stop
- # word is at the start of a line.
-
- # Many of the forms below are quite rare but included for completeness.
+#   http://snowball.tartarus.org/otherapps/romanian/intro.html (romanian2.tgz)
+#   https://github.com/stopwords-iso/stopwords-ro/blob/master/stopwords-ro.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
-           # ARTICLE
-             # Indefinite article
-o              # a
-unui
-unei
-unor
-nişte         # some
-             #  Demonstrative/adjectival article
-cel
-cea
-cei
-cele
-celui
-celei
-celor
-             #  Possessive / genitival article
-al             # of
 a
-ai
-ale
-           # PREPOSITION AND ADVERB
-pe             # on
-la             # at
-în            # in
-fără         # without
-sub            # under
-despre         # about
-către         # to
-cu             # with
-de             # from
-din            # on
-lângă        # by
-pentru         # for
-peste          # over
-spre           # to
-prin           # through
-dintre         # between
-printre        # among
-până         # until
-după          # after
-înspre        # towards
-ca             # as
-           # ADJECTIVE
-mai            # more
-decât         # than
-cum            # how
-foarte         # very
-mult           # much
-multă
-mulţi
-multe
-puţin         # little
-puţină
-puţini
-puţine
-destul         # enough
-destulă
-destui
-destule
-           # PRONOUN
-             # Personal pronoun
-eu             # I
-tu             # you
-el             # he
-ea             # she
-noi            # we
-voi            # you
-ei             # they
-ele            # they
-mie            # me
-îmi
-mi
-mine
-mă
-m
-ţie           # you
-îţi
-ţi
-tine
-te
-lui            # him
-îl
-l
-îi
-i
-nouă          # us
-ne
-ni
-vouă          # you
-vă
-vi
-v
-lor            # them
-le
-li
-             # Pronoun of politeness
-dumneavoastră # you
-             # Reflexive pronoun
-se             # himself
-îşi
-sie
-sieşi
-sine
-             # Pronoun of reinforcement
-însumi        # myself
-însămi
-însuţi       # youself
-însăţi
-însuşi       # himself
-însăşi      # herself
-înşine       # ourselves
-însene
-înşivă      # youselves
-însevă
-înşişi      # themselves
-înseşi
-însele
-             # Possessive pronoun
-meu            # mine
-mea
-mei
-mele
-tău           # yours
-ta
-tăi
-tale
-său           # his
-sa
-săi
-sale
-nostru         # ours
-noastră
-noştri
-noastre
-vostru         # yours
-voastră
-voştri
-voastre
-             # Demonstrative pronoun
-acesta         # this
-ăsta
-aceştia
-ăştia
-acestuia
-ăstuia
-acestora
-ăstora
+abia
+acea
 aceasta
-asta
-acestea
-astea
-acesteia
-ăsteia
-acest
-aceşti
-acestui
-acestor
 această
-aceste
-acestei
-acela          # that
-ăla
-acelui
-ăluia
-aceia
-ăia
-acelora
-ălora
 aceea
-aia
-acelea
-alea
-aceleia
-ăleia
-acel
+aceeasi
+aceeaşi
 acei
-acelor
-acea
-acele
-acelei
-acelaşi       # the same
+aceia
 aceiaşi
-aceeaşi
+acel
+acela
+acelasi
+acelaşi
+acele
+acelea
 aceleaşi
-aceluiaşi
-aceloraşi
+acelei
+aceleia
 aceleiaşi
-celălalt      # the other
-celuilalt
-ceilalţi
-celorlalţi
-cealaltă
-celeilalte
-celelalte
-celorlalte
-             # Interrogative pronoun
-ce             # what
-cine           # who
-cui            # whom
-care           # which, what
-cărui
-cărei
-căror
-unde           # where
-când          # when
-             # Indefinite pronoun
-cineva         # someone
-cuiva
-altcineva      # someone else
-altcuiva
-oricine        # anyone
-oricui
-orice          # anything
-unul           # one
-una
-unii
-unele
-unuia
-uneia
-unora
-altul          # other
-alta
-alţii
-altele
+acelora
+aceloraşi
+acelui
+aceluiaşi
+acest
+acesta
+aceste
+acestea
+acestei
+acesteia
+acestia
+acestor
+acestora
+acestui
+acestuia
+aceşti
+aceştia
+acolo
+acord
+acum
+adica
+ai
+aia
+aibă
+aici
+aiurea
+al
+ala
+alaturi
+ale
+alea
 alt
-altă
-alţi
+alta
+altceva
+altcineva
+altcuiva
 alte
-altuia
+altei
 alteia
+altele
+altfel
+alti
+altii
+altor
 altora
 altui
-altei
-altor
-vreunul        # somebody, some (of them)
-vreuna
-vreunii
-vreunele
-vreun
-vreo
-vreunuia
-vreuneia
-vreunora
-vreunui
-vreunei
-vreunor
-oricare        # anyone
-oricăruia
-oricăreia
-oricărora
-oricărui
-oricărei
-oricăror
-fiecare        # everyone
-fiecăruia
-fiecăreia
-fiecărui
-fiecărei
-cât           # how, how many
-câtă
-câţi
-câte
-câtora
-câtor
-atât          # this much
+altuia
+altul
+altă
+alţi
+alţii
+am
+ambele
+ambelor
+ambii
+ambilor
+amândoi
+amândouă
+amânduror
+amândurora
+anume
+apoi
+aproape
+ar
+are
+as
+asa
+asemenea
+asta
+astazi
+astea
+astfel
+astăzi
+asupra
+atare
+atat
+atata
+atatea
+atatia
+ati
+atit
+atita
+atitea
+atitia
+atunci
+atât
 atâta
-atâţi
-atâţia
 atâtea
-atâtora
 atâtor
-oricât        # however much
-oricâtă
-oricâţi
-oricâte
-oricâtora
-oricâtor
-câtva         # some
-câţiva
+atâtora
+atâţi
+atâţia
+au
+avea
+aveai
+aveam
+aveau
+aveaţi
+avem
+aveţi
+avut
+azi
+aş
+aşa
+aşadar
+aţi
+b
+ba
+bine
+bucur
+bună
+c
+ca
+cam
+cand
+capat
+care
+careia
+carora
+caruia
+cat
+catre
+caut
+ce
+cea
+cealaltă
+ceea
+cei
+ceilalti
+ceilalţi
+cel
+cele
+celei
+celeilalte
+celelalte
+celor
+celorlalte
+celorlalţi
+celui
+celuilalt
+celălalt
+ceva
+chiar
+ci
+cinci
+cind
+cine
+cineva
+cit
+cita
+cite
+citeva
+citi
+citiva
+conform
+contra
+cu
+cui
+cuiva
+cum
+cumva
+curând
+curînd
+cutare
+când
+cât
+câte
 câteva
+câtor
+câtora
 câtorva
-tot            # all
-toată
-toţi
-toate
-tuturor
-totul
-cutare         # that
-oarecare       # some
-ceva           # something
-altceva        # something else
-             # Negative pronoun
-nimeni         # nobody
-nimănui
-nimic          # nothing
-           # NUMERAL
-             # Cardinal numeral
-unu            # one
-doi            # two
+câtva
+câtă
+câţi
+câţiva
+cînd
+cît
+cîte
+cîtva
+cîţi
+că
+căci
+cărei
+căror
+cărui
+către
+d
+da
+daca
+dacă
+dar
+dat
+datorită
+dată
+dau
+de
+deasupra
+deci
+decit
+decât
+degraba
+deja
+deoarece
+departe
+desi
+despre
+destui
+destul
+destule
+destulă
+deşi
+din
+dinaintea
+dintr
+dintr-
+dintre
+doar
+doi
+doilea
+doime
 doua
-trei           # three
-patru          # four
-cinci          # five
-şase          # six
-şapte         # seven
-opt            # eight
-noua           # nine
-zece           # ten
-             # Fractional numeral
-doime          # half
-treime         # third
-sutime         # hundredth
-             # Collective numeral
-amândoi       # both
-amândouă
-amândurora
-amânduror
-ambii
-ambele
-ambilor
-ambelor
-             # Multiplicative numeral
-îndoit        # double
-întreit       # threefold
-însutit       # hundred-fold
-             # Ordinal numeral
-întâiul      # the first
-întâia
-primul         # former
-prima
-primii
-primele
-primului
-primei
-primilor
-primelor
-           # VERB
-             # To be
-sunt           # (I) am
-s
-eşti          # (you) are
-este           # (he/she) is
+două
+drept
+dumneavoastră
+dupa
+după
+dă
 e
-suntem         # (we) are
-sunteţi       # (you) are
-eram           # (I) were
-erai           # (you) were
-era            # (he) was
-eraţi         # (you) were
-erau           # (they) were
-fiu            # be
-fii
+ea
+ei
+el
+ele
+era
+erai
+eram
+erau
+este
+eu
+exact
+există
+eşti
+f
+face
+fara
+fata
+faţă
+fel
+fi
 fie
+fiecare
+fiecărei
+fiecăreia
+fiecărui
+fiecăruia
+fii
+fiind
 fim
+fiu
 fiţi
-fi
-fiind          # being
-fost           # been
-             # Auxiliary verb
-am             # to have  - all forms
-aţi
-au
-are
-avem
-aveţi
-aveam
-aveai
-avea
-aveaţi
-aveau
-aş
-ar
-oi             # to will
+foarte
+făcut
+g
+h
+i
+ia
+iar
+ieri
+ii
+il
+imi
+in
+inainte
+inapoi
+inca
+incit
+insa
+intr
+intre
+isi
+iti
+j
+k
+l
+la
+le
+li
+lor
+lui
+lângă
+lîngă
+m
+ma
+mai
+mare
+mea
+mei
+mele
+mereu
+meu
+mi
+mie
+mine
+mod
+mult
+multa
+multe
+multi
+multă
+mulţi
+mulţumesc
+mâine
+mîine
+mă
+n
+ne
+nevoie
+ni
+nici
+niciodata
+nicăieri
+nimeni
+nimeri
+nimic
+nimănui
+niste
+nişte
+noastre
+noastră
+noi
+noroc
+nostri
+nostru
+nou
+noua
+nouă
+noştri
+nu
+numai
+o
+oarecare
+oi
 om
-oţi
+opt
 or
-vei
+ori
+oricare
+orice
+oricine
+oricui
+oricum
+oricând
+oricât
+oricâte
+oricâtor
+oricâtora
+oricâtă
+oricâţi
+oricînd
+oricît
+oricărei
+oricăreia
+oricăror
+oricărora
+oricărui
+oricăruia
+oriunde
+oţi
+p
+pai
+parte
+patra
+patru
+patrulea
+pe
+pentru
+peste
+pic
+pina
+plus
+poate
+pot
+prea
+prima
+primei
+primele
+primelor
+primii
+primilor
+primul
+primului
+prin
+printr-
+printre
+putea
+putini
+puţin
+puţina
+puţine
+puţini
+puţină
+până
+pînă
+r
+rog
+s
+sa
+sa-mi
+sa-ti
+sai
+sale
+sau
+se
+si
+sie
+sieşi
+sine
+sint
+sintem
+spate
+spre
+spune
+spus
+sub
+sunt
+suntem
+sunteţi
+sus
+sutime
+sută
+sînt
+sîntem
+sînteţi
+să
+săi
+său
+t
+ta
+tale
+te
+ti
+timp
+tine
+toata
+toate
+toată
+tocmai
+tot
+toti
+totul
+totusi
+totuşi
+toţi
+trebuie
+trei
+treia
+treilea
+treime
+tu
+tuturor
+tăi
+tău
+u
+ul
+un
+una
+unde
+undeva
+unei
+uneia
+unele
+uneori
+unii
+unor
+unora
+unu
+unui
+unuia
+unul
+v
 va
-vom
+vei
 veţi
+vi
+voastre
+voastră
+voi
+vom
 vor
-           # CONJUNCTION
-şi            # and
-nici           # neither
-dar            # but
+vostru
+vouă
+voştri
+vreme
+vreo
+vreun
+vreuna
+vreunei
+vreuneia
+vreunele
+vreunii
+vreunor
+vreunora
+vreunui
+vreunuia
+vreunul
+vă
+x
+z
+zece
+zero
+zi
+zice
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încotro
+încât
+încît
+încă
+îndoit
+însele
+însene
+însevă
+înseşi
+înspre
+însumi
+însutit
+însuşi
+însuţi
 însă
-iar            # and, but, while, again
-ci             # but, so that
-sau            # or
-ori
-deci           # so
-aşadar
-încât        # so that
-aşa           # such
-deşi          # although
-totuşi        # though
-dacă          # if
-atunci         # then
-că            # that
-           # OTHER
-nu             # no
-
- # The following is a ranked list (commonest to rarest) of stopwords
- # deriving from a large sample of text.
-
-poate          # maybe
-ieri           # yesterday
-mare           # big
-doar           # just
-trebuie        # must
-spus           # said
-acum           # now
-putea          # can
-chiar          # even
-face           # do
-astfel         # such
-pot            # can
-făcut         # done
-avut           # had
-parte          # part
-spune          # says
-bine           # good
-faţă         # front
-există        # exists
-încă         # still
-numai          # only
-dat            # given
-asupra         # on
-aproape        # near
+însămi
+însăşi
+însăţi
+între
+întreit
+întrucât
+întrucît
+întâia
+întâiul
+înşine
+înşivă
+înşişi
+îşi
+îţi
+ăia
+ăla
+ălea
+ăleia
+ălora
+ăluia
+ăsta
+ăstea
+ăstuia
+ăştia
+şapte
+şase
+şi
+ştiu
+ţi
+ţie
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/ro/ro_stop_words_old.txt b/apps/common/src/python/mediawords/languages/ro/ro_stop_words_old.txt
new file mode 100755
index 0000000000..2afa1eb3de
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/ro/ro_stop_words_old.txt
@@ -0,0 +1,440 @@
+#
+# This is a stop word list for the Romanian language.
+#
+# Source: http://snowball.tartarus.org/otherapps/romanian/intro.html (romanian2.tgz)
+#
+
+ # A Romanian stop word list. Comments begin with vertical bar. Each stop
+ # word is at the start of a line.
+
+ # Many of the forms below are quite rare but included for completeness.
+
+           # ARTICLE
+             # Indefinite article
+o              # a
+unui
+unei
+unor
+nişte         # some
+             #  Demonstrative/adjectival article
+cel
+cea
+cei
+cele
+celui
+celei
+celor
+             #  Possessive / genitival article
+al             # of
+a
+ai
+ale
+           # PREPOSITION AND ADVERB
+pe             # on
+la             # at
+în            # in
+fără         # without
+sub            # under
+despre         # about
+către         # to
+cu             # with
+de             # from
+din            # on
+lângă        # by
+pentru         # for
+peste          # over
+spre           # to
+prin           # through
+dintre         # between
+printre        # among
+până         # until
+după          # after
+înspre        # towards
+ca             # as
+           # ADJECTIVE
+mai            # more
+decât         # than
+cum            # how
+foarte         # very
+mult           # much
+multă
+mulţi
+multe
+puţin         # little
+puţină
+puţini
+puţine
+destul         # enough
+destulă
+destui
+destule
+           # PRONOUN
+             # Personal pronoun
+eu             # I
+tu             # you
+el             # he
+ea             # she
+noi            # we
+voi            # you
+ei             # they
+ele            # they
+mie            # me
+îmi
+mi
+mine
+mă
+m
+ţie           # you
+îţi
+ţi
+tine
+te
+lui            # him
+îl
+l
+îi
+i
+nouă          # us
+ne
+ni
+vouă          # you
+vă
+vi
+v
+lor            # them
+le
+li
+             # Pronoun of politeness
+dumneavoastră # you
+             # Reflexive pronoun
+se             # himself
+îşi
+sie
+sieşi
+sine
+             # Pronoun of reinforcement
+însumi        # myself
+însămi
+însuţi       # youself
+însăţi
+însuşi       # himself
+însăşi      # herself
+înşine       # ourselves
+însene
+înşivă      # youselves
+însevă
+înşişi      # themselves
+înseşi
+însele
+             # Possessive pronoun
+meu            # mine
+mea
+mei
+mele
+tău           # yours
+ta
+tăi
+tale
+său           # his
+sa
+săi
+sale
+nostru         # ours
+noastră
+noştri
+noastre
+vostru         # yours
+voastră
+voştri
+voastre
+             # Demonstrative pronoun
+acesta         # this
+ăsta
+aceştia
+ăştia
+acestuia
+ăstuia
+acestora
+ăstora
+aceasta
+asta
+acestea
+astea
+acesteia
+ăsteia
+acest
+aceşti
+acestui
+acestor
+această
+aceste
+acestei
+acela          # that
+ăla
+acelui
+ăluia
+aceia
+ăia
+acelora
+ălora
+aceea
+aia
+acelea
+alea
+aceleia
+ăleia
+acel
+acei
+acelor
+acea
+acele
+acelei
+acelaşi       # the same
+aceiaşi
+aceeaşi
+aceleaşi
+aceluiaşi
+aceloraşi
+aceleiaşi
+celălalt      # the other
+celuilalt
+ceilalţi
+celorlalţi
+cealaltă
+celeilalte
+celelalte
+celorlalte
+             # Interrogative pronoun
+ce             # what
+cine           # who
+cui            # whom
+care           # which, what
+cărui
+cărei
+căror
+unde           # where
+când          # when
+             # Indefinite pronoun
+cineva         # someone
+cuiva
+altcineva      # someone else
+altcuiva
+oricine        # anyone
+oricui
+orice          # anything
+unul           # one
+una
+unii
+unele
+unuia
+uneia
+unora
+altul          # other
+alta
+alţii
+altele
+alt
+altă
+alţi
+alte
+altuia
+alteia
+altora
+altui
+altei
+altor
+vreunul        # somebody, some (of them)
+vreuna
+vreunii
+vreunele
+vreun
+vreo
+vreunuia
+vreuneia
+vreunora
+vreunui
+vreunei
+vreunor
+oricare        # anyone
+oricăruia
+oricăreia
+oricărora
+oricărui
+oricărei
+oricăror
+fiecare        # everyone
+fiecăruia
+fiecăreia
+fiecărui
+fiecărei
+cât           # how, how many
+câtă
+câţi
+câte
+câtora
+câtor
+atât          # this much
+atâta
+atâţi
+atâţia
+atâtea
+atâtora
+atâtor
+oricât        # however much
+oricâtă
+oricâţi
+oricâte
+oricâtora
+oricâtor
+câtva         # some
+câţiva
+câteva
+câtorva
+tot            # all
+toată
+toţi
+toate
+tuturor
+totul
+cutare         # that
+oarecare       # some
+ceva           # something
+altceva        # something else
+             # Negative pronoun
+nimeni         # nobody
+nimănui
+nimic          # nothing
+           # NUMERAL
+             # Cardinal numeral
+unu            # one
+doi            # two
+doua
+trei           # three
+patru          # four
+cinci          # five
+şase          # six
+şapte         # seven
+opt            # eight
+noua           # nine
+zece           # ten
+             # Fractional numeral
+doime          # half
+treime         # third
+sutime         # hundredth
+             # Collective numeral
+amândoi       # both
+amândouă
+amândurora
+amânduror
+ambii
+ambele
+ambilor
+ambelor
+             # Multiplicative numeral
+îndoit        # double
+întreit       # threefold
+însutit       # hundred-fold
+             # Ordinal numeral
+întâiul      # the first
+întâia
+primul         # former
+prima
+primii
+primele
+primului
+primei
+primilor
+primelor
+           # VERB
+             # To be
+sunt           # (I) am
+s
+eşti          # (you) are
+este           # (he/she) is
+e
+suntem         # (we) are
+sunteţi       # (you) are
+eram           # (I) were
+erai           # (you) were
+era            # (he) was
+eraţi         # (you) were
+erau           # (they) were
+fiu            # be
+fii
+fie
+fim
+fiţi
+fi
+fiind          # being
+fost           # been
+             # Auxiliary verb
+am             # to have  - all forms
+aţi
+au
+are
+avem
+aveţi
+aveam
+aveai
+avea
+aveaţi
+aveau
+aş
+ar
+oi             # to will
+om
+oţi
+or
+vei
+va
+vom
+veţi
+vor
+           # CONJUNCTION
+şi            # and
+nici           # neither
+dar            # but
+însă
+iar            # and, but, while, again
+ci             # but, so that
+sau            # or
+ori
+deci           # so
+aşadar
+încât        # so that
+aşa           # such
+deşi          # although
+totuşi        # though
+dacă          # if
+atunci         # then
+că            # that
+           # OTHER
+nu             # no
+
+ # The following is a ranked list (commonest to rarest) of stopwords
+ # deriving from a large sample of text.
+
+poate          # maybe
+ieri           # yesterday
+mare           # big
+doar           # just
+trebuie        # must
+spus           # said
+acum           # now
+putea          # can
+chiar          # even
+face           # do
+astfel         # such
+pot            # can
+făcut         # done
+avut           # had
+parte          # part
+spune          # says
+bine           # good
+faţă         # front
+există        # exists
+încă         # still
+numai          # only
+dat            # given
+asupra         # on
+aproape        # near
diff --git a/apps/common/src/python/mediawords/languages/ru/ru_stop_words.txt b/apps/common/src/python/mediawords/languages/ru/ru_stop_words.txt
index e4a59dda4c..f4721e80e3 100644
--- a/apps/common/src/python/mediawords/languages/ru/ru_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/ru/ru_stop_words.txt
@@ -1,10 +1,13 @@
+# This is a stop word list for the Russian language.
 #
-# This is a "short" stop word list for the Russian language.
-#
+#  https://github.com/stopwords-iso/stopwords-ru/blob/master/stopwords-ru.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
+# Source:
 
 adriver
 amp
 bin
+c
 cgi
 href
 html
@@ -16,517 +19,456 @@ rnd
 sid
 style
 www
-а
 А
+а
 августа
-акций
-Александр
-Александра
-Алексей
-Анатолий
-Андрей
+алло
 АО
 апрель
 апреля
-Ассошиэйтед
 Б
-без
 Без
+без
+близко
 блог
-более
 Более
+более
 больше
-большинство
-большой
-Борис
+будем
 будет
+будете
+будешь
 будто
+буду
 будут
+будь
 бы
-бывшего
+бывает
 бывший
+бывь
 был
 была
 были
 было
 быстро
 быть
-в
 В
+в
+важная
+важное
+важные
+важный
 вам
+вами
 вас
 ваш
+ваша
+ваше
+ваши
+вверх
+вдали
 вдруг
-ведь
 Ведь
-века
-вести
+ведь
+везде
 весь
 весьма
 взгляд
 взять
+вид
 виде
+видел
+видеть
 видимо
-Виктор
-вице
 включая
-Владимира
 власть
 вместе
 вместо
-внимание
+вниз
+внизу
 вновь
-во
 Во
+во
 вовсе
-воды
-возможно
-возможности
-возможность
-войск
 вокруг
+вон
 вообще
 вопрос
-вопросы
-воскресение
-вот
+восемь
+восьмой
 Вот
+вот
 впервые
 вполне
 Впрочем
+впрочем
 времена
 времени
 время
 вроде
 вряд
-все
 Все
+все
+все еще
 всегда
-всего
 всей
 всем
+всеми
 всему
 всех
+всею
 встречи
 всю
+всюду
 вся
 всё
 вторая
 второй
-вы
 Вы
-выборах
+вы
 выше
-выяснилось
-г
 Г
+г
 где
-глава
-главе
-главного
 главное
 главный
 главным
-главы
+глаз
 го
 говорил
 говорит
-говорится
-говорить
 говоря
 говорят
 год
 года
 году
 годы
-город
+голова
 города
 городе
-градусов
-Грозном
 группа
 группы
-д
 Д
+д
 Да
 да
-давно
+давать
 дает
-даже
 Даже
+даже
 дал
+далекий
 далеко
 дальше
-данным
+даром
 дать
 два
 две
-движения
 двух
-действий
-действительно
-действия
+девятый
+девять
 декабря
-дел
-дела
 делаем
-делам
+делал
 делать
-деле
-дело
-Дело
-делу
+делаю
 день
-деньги
+десятый
 десять
-деятельности
-деятельность
-директора
-для
 Для
+для
 дней
 дни
 дня
 днях
-до
 До
-довольно
-документы
-долго
+до
 должен
 должна
 должно
 должны
-дом
-дома
-доме
-достаточно
-друг
-друга
+должный
+дорога
+другая
 другие
 другим
 других
+друго
 другое
 другой
 другом
-е
+думать
 Е
-его
+е
 Его
+его
 едва
 ее
 ей
 ему
-если
 Если
-естественно
-есть
+если
 Есть
-еще
+есть
 Еще
+еще
+ещё
+ею
+её
+ж
+ждать
 же
-женщин
-женщины
-жизни
-жизнь
-жителей
 жить
-за
 За
-завода
-закон
-зам
-заместитель
+за
+занят
 затем
+зато
+зачем
 заявил
-заявление
-здесь
 Здесь
-земли
+здесь
 знает
+знать
 значит
 знаю
 знают
-зрения
-и
 И
-игры
+и
 идет
-из
+иди
+идти
 Из
-Известий
-Известия
-Известиям
-известно
+из
 или
-Иллюстрация
 им
 имеет
+имел
 имени
-именно
 Именно
+именно
 иметь
 имеют
+ими
 имя
 иначе
-интервью
-интересы
-информацию
-история
+иногда
 ИТАР
 итоге
-их
 Их
+их
 июля
 июня
 й
-к
 К
+к
 каждая
 каждого
+каждое
+каждые
 каждый
 кажется
-как
 Как
+как
+какая
 какие
 каким
 каких
 какой
-касается
-качестве
-квартиры
-километров
-когда
+кем
 Когда
+когда
 кого
-количество
 команда
 команды
-комиссии
-комитета
-комментариев
-компания
+комната
 кому
-конечно
+конец
 Конечно
-конференции
+конечно
 конца
 конце
-коп
-корреспонденту
 которая
-которого
 которое
 которой
 котором
-которому
 которую
 которые
 который
 которым
-которыми
 которых
-края
 Кроме
 кроме
-крупных
+кругом
 кстате
 Кстати
 кстати
-кто
 Кто
+кто
 куда
 Л
-легко
 лет
-летний
 ли
 либо
-лидер
-лиц
-лица
-лично
+лицо
 лишь
-лучше
 любая
 любой
 людей
 люди
-людям
 М
 м
 мало
-марта
-массовой
 мая
-между
 Между
+между
 мене
 менее
 меньше
 меня
-мере
-меры
 места
-месте
-местных
 место
 месяц
 месяца
 месяцев
 метра
 метров
-миллиарда
 миллион
-миллиона
+мимо
 минут
+минута
+мира
 мире
-мировой
-Михаил
-мне
 Мне
-многие
+мне
 Многие
+многие
 многих
 много
+мной
+мною
 мог
 могла
 могли
+могу
 могут
-может
+мож
 Может
-можно
+может
+может быть
 Можно
+можно
+можхо
+мои
 мой
 момент
-мы
+мочь
+моя
+моё
 Мы
+мы
 Н
-на
-На
 НА
+На
+на
+наверху
 над
-надо
 назад
-наиболее
 найти
 наконец
 нам
-например
-народа
+нами
 нас
-находившегося
-находится
 начала
 начале
-начальник
-начальника
+начать
 наш
 наша
+наше
 нашего
 нашей
 наши
 наших
-не
-Не
 НЕ
-невозможно
+Не
+не
 него
 недавно
 недели
 неделю
 нее
 ней
-некоторого
-некоторые
-некоторых
-нельзя
 нем
 немало
 нему
-необходимо
-нескольких
-несколько
-несмотря
-нет
 Нет
-ни
+нет
+нею
+неё
 Ни
+ни
 нибудь
+ниже
 никак
 никаких
+никакой
 никогда
-Николай
 никто
+никуда
 ним
 ними
 них
 ничего
-но
+ничто
 Но
-нового
-новой
-новостей
-новые
-новый
-новых
+но
 ноября
 Ну
+ну
 нужно
+нужный
+нх
 ныне
-нынешнего
 Нью
-о
 О
-об
+о
 Об
+об
+оба
 области
 образом
 обычно
-один
 Один
+один
 одна
+однажды
 Однако
 однако
 одним
 одно
-одновременно
 одного
 одной
 одном
 одну
-оказалась
-оказались
-оказалось
-оказался
 около
 октября
-он
 Он
-она
+он
 Она
-они
+она
 Они
+они
 оно
-операции
-опыт
 опять
-органы
-основном
-особенно
-остается
-от
 От
+от
 ответ
-отдела
-отличие
-отношении
-отношения
+отец
+откуда
+отсюда
 очень
-очередной
 очередь
 П
-партия
 первая
 первого
 первой
@@ -537,149 +479,73 @@ www
 первых
 перед
 период
-письмо
-площади
-по
 По
+по
 поводу
 под
-подобная
-позиции
-пока
+подойди
+позже
+пойти
 Пока
-политики
-полностью
-положение
-полтора
-получил
-получили
-получить
-помощи
-помощь
-помощью
-понять
+пока
+пол
 пор
-порядке
-посколько
-поскольку
-после
+пора
 После
-последнее
-последние
-последний
-последних
-пост
-постоянно
-потом
+после
 Потом
+потом
 потому
 похоже
-почему
 Почему
-почта
+почему
 почти
 Поэтому
-поэтому
-права
 Правда
-правда
-правило
-право
-практически
-предприятий
-предприятия
-председателя
-представителей
-представители
 прежде
-прежнему
-премьер
-премьера
-Пресс
-пресс
-при
 При
-придется
-примерно
-примеру
-принять
-приходится
+при
 Причем
-пришлось
 про
-проблем
-проблема
-проблемы
-провести
-продукции
-проект
-производства
-производство
-произошло
-происходит
-прокуратуры
 просто
-против
-процента
-процентов
-процесс
-прошла
-прошлого
-прошлом
 прямо
 пути
 путь
 пятая
 пяти
+пятый
 пять
-работа
-работавшую
-работает
-работать
-работе
-работу
-равно
 раз
 раза
-развития
-разных
-района
-районе
+разве
 ранее
 раньше
-резко
-результате
 Рейтер
-речь
-решения
 решил
 решили
-рода
-роль
-руб
-рук
-руках
-руки
-руководителей
-руководитель
-руководство
+решить
 ряд
 рядом
-с
 С
+с
+с кем
 сам
 сама
 сами
+самим
+самими
+самих
+само
 самого
 самое
 самой
 самом
+самому
+саму
 самые
 самый
 самым
 самых
-сборной
-свет
 свое
 своего
 своей
@@ -691,219 +557,153 @@ www
 своих
 свой
 свою
-связи
-сделаем
 сделал
-сделать
 себе
 себя
-сегодня
 Сегодня
-сейчас
 Сейчас
-семьи
-сентября
-Сергей
-Сергея
-силу
-силы
-система
-системы
-ситуации
-ситуацию
-ситуация
+сейчас
+семь
+сидеть
 сих
 скажем
 сказал
-сказать
-сколько
 скорее
-следует
-слишком
-слова
-словам
-случае
-случай
-смерти
 снова
 со
 собой
-собственности
-событий
-события
-совершенно
+собою
 совсем
-создать
 сообща
-сообщил
-сообщили
-состоянии
-сотрудники
-сотрудников
-специалистов
-специалисты
 сразу
-среди
 Среди
-средств
-средства
+среди
 срок
-ссылка
 стал
 стала
 стали
 стало
 станет
-становится
 стате
 стать
-степени
 сто
 стоит
-столице
 столь
-столько
-сторону
-стороны
-суббота
-суда
 сумму
-сути
-существует
-счет
-считает
-считать
-считают
 т
-так
+та
 Так
+так
 такая
 также
 таки
 такие
-таким
 Таким
+таким
 таких
 такого
 такое
 такой
-там
 Там
+там
 ТАСС
+твои
+твой
+твоя
+твоё
 те
-театра
 тебе
-тем
+тебя
 Тем
-теперь
+тем
+теми
 Теперь
-территории
+теперь
 тех
-течение
-то
 То
-тогда
+то
+тобой
+тобою
 Тогда
+тогда
 того
 тоже
 той
-только
 Только
+только
 том
 тому
 тонн
 тот
-точки
-точнее
+тою
+третий
 трех
 три
-труда
 трудно
+ту
 туда
 тут
 ты
 тысяч
 тысячи
-у
 У
-удалось
+у
 уж
-уже
 Уже
-уровень
+уже
+уметь
 уровне
-условия
-условиях
-утверждает
-утверждают
-участие
-участников
-факт
-февраля
-фирм
-фирма
-фирмы
-фонда
-Фото
 х
-ходе
 хорошо
+хотел бы
+хотеть
 хоть
-хотя
 Хотя
-хочет
-целом
-центр
-центра
-центре
-цены
+хотя
+хочешь
 час
 часа
 часов
 части
-частности
 часто
 часть
 чаще
 чего
-человек
-человека
 чем
-через
+чему
 Через
+через
 четыре
-четырех
 числе
 число
 членов
-что
 Что
-чтобы
+что
+чтоб
 Чтобы
+чтобы
 чуть
+шестой
 шесть
-эта
 Эта
-эти
+эта
 Эти
+эти
 этим
+этими
 этих
-это
 Это
+это
 этого
 этой
 этом
 этому
-этот
 Этот
+этот
 эту
 Ю
-Юрий
-я
 Я
-являетесь
-является
+я
 явно
 якобы
-января
-ясно
+января
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/ru/ru_stop_words_old.txt b/apps/common/src/python/mediawords/languages/ru/ru_stop_words_old.txt
new file mode 100644
index 0000000000..e4a59dda4c
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/ru/ru_stop_words_old.txt
@@ -0,0 +1,909 @@
+#
+# This is a "short" stop word list for the Russian language.
+#
+
+adriver
+amp
+bin
+cgi
+href
+html
+http
+link
+livejournal
+quot
+rnd
+sid
+style
+www
+а
+А
+августа
+акций
+Александр
+Александра
+Алексей
+Анатолий
+Андрей
+АО
+апрель
+апреля
+Ассошиэйтед
+Б
+без
+Без
+блог
+более
+Более
+больше
+большинство
+большой
+Борис
+будет
+будто
+будут
+бы
+бывшего
+бывший
+был
+была
+были
+было
+быстро
+быть
+в
+В
+вам
+вас
+ваш
+вдруг
+ведь
+Ведь
+века
+вести
+весь
+весьма
+взгляд
+взять
+виде
+видимо
+Виктор
+вице
+включая
+Владимира
+власть
+вместе
+вместо
+внимание
+вновь
+во
+Во
+вовсе
+воды
+возможно
+возможности
+возможность
+войск
+вокруг
+вообще
+вопрос
+вопросы
+воскресение
+вот
+Вот
+впервые
+вполне
+Впрочем
+времена
+времени
+время
+вроде
+вряд
+все
+Все
+всегда
+всего
+всей
+всем
+всему
+всех
+встречи
+всю
+вся
+всё
+вторая
+второй
+вы
+Вы
+выборах
+выше
+выяснилось
+г
+Г
+где
+глава
+главе
+главного
+главное
+главный
+главным
+главы
+го
+говорил
+говорит
+говорится
+говорить
+говоря
+говорят
+год
+года
+году
+годы
+город
+города
+городе
+градусов
+Грозном
+группа
+группы
+д
+Д
+Да
+да
+давно
+дает
+даже
+Даже
+дал
+далеко
+дальше
+данным
+дать
+два
+две
+движения
+двух
+действий
+действительно
+действия
+декабря
+дел
+дела
+делаем
+делам
+делать
+деле
+дело
+Дело
+делу
+день
+деньги
+десять
+деятельности
+деятельность
+директора
+для
+Для
+дней
+дни
+дня
+днях
+до
+До
+довольно
+документы
+долго
+должен
+должна
+должно
+должны
+дом
+дома
+доме
+достаточно
+друг
+друга
+другие
+другим
+других
+другое
+другой
+другом
+е
+Е
+его
+Его
+едва
+ее
+ей
+ему
+если
+Если
+естественно
+есть
+Есть
+еще
+Еще
+же
+женщин
+женщины
+жизни
+жизнь
+жителей
+жить
+за
+За
+завода
+закон
+зам
+заместитель
+затем
+заявил
+заявление
+здесь
+Здесь
+земли
+знает
+значит
+знаю
+знают
+зрения
+и
+И
+игры
+идет
+из
+Из
+Известий
+Известия
+Известиям
+известно
+или
+Иллюстрация
+им
+имеет
+имени
+именно
+Именно
+иметь
+имеют
+имя
+иначе
+интервью
+интересы
+информацию
+история
+ИТАР
+итоге
+их
+Их
+июля
+июня
+й
+к
+К
+каждая
+каждого
+каждый
+кажется
+как
+Как
+какие
+каким
+каких
+какой
+касается
+качестве
+квартиры
+километров
+когда
+Когда
+кого
+количество
+команда
+команды
+комиссии
+комитета
+комментариев
+компания
+кому
+конечно
+Конечно
+конференции
+конца
+конце
+коп
+корреспонденту
+которая
+которого
+которое
+которой
+котором
+которому
+которую
+которые
+который
+которым
+которыми
+которых
+края
+Кроме
+кроме
+крупных
+кстате
+Кстати
+кстати
+кто
+Кто
+куда
+Л
+легко
+лет
+летний
+ли
+либо
+лидер
+лиц
+лица
+лично
+лишь
+лучше
+любая
+любой
+людей
+люди
+людям
+М
+м
+мало
+марта
+массовой
+мая
+между
+Между
+мене
+менее
+меньше
+меня
+мере
+меры
+места
+месте
+местных
+место
+месяц
+месяца
+месяцев
+метра
+метров
+миллиарда
+миллион
+миллиона
+минут
+мире
+мировой
+Михаил
+мне
+Мне
+многие
+Многие
+многих
+много
+мог
+могла
+могли
+могут
+может
+Может
+можно
+Можно
+мой
+момент
+мы
+Мы
+Н
+на
+На
+НА
+над
+надо
+назад
+наиболее
+найти
+наконец
+нам
+например
+народа
+нас
+находившегося
+находится
+начала
+начале
+начальник
+начальника
+наш
+наша
+нашего
+нашей
+наши
+наших
+не
+Не
+НЕ
+невозможно
+него
+недавно
+недели
+неделю
+нее
+ней
+некоторого
+некоторые
+некоторых
+нельзя
+нем
+немало
+нему
+необходимо
+нескольких
+несколько
+несмотря
+нет
+Нет
+ни
+Ни
+нибудь
+никак
+никаких
+никогда
+Николай
+никто
+ним
+ними
+них
+ничего
+но
+Но
+нового
+новой
+новостей
+новые
+новый
+новых
+ноября
+Ну
+нужно
+ныне
+нынешнего
+Нью
+о
+О
+об
+Об
+области
+образом
+обычно
+один
+Один
+одна
+Однако
+однако
+одним
+одно
+одновременно
+одного
+одной
+одном
+одну
+оказалась
+оказались
+оказалось
+оказался
+около
+октября
+он
+Он
+она
+Она
+они
+Они
+оно
+операции
+опыт
+опять
+органы
+основном
+особенно
+остается
+от
+От
+ответ
+отдела
+отличие
+отношении
+отношения
+очень
+очередной
+очередь
+П
+партия
+первая
+первого
+первой
+первую
+первые
+первый
+первым
+первых
+перед
+период
+письмо
+площади
+по
+По
+поводу
+под
+подобная
+позиции
+пока
+Пока
+политики
+полностью
+положение
+полтора
+получил
+получили
+получить
+помощи
+помощь
+помощью
+понять
+пор
+порядке
+посколько
+поскольку
+после
+После
+последнее
+последние
+последний
+последних
+пост
+постоянно
+потом
+Потом
+потому
+похоже
+почему
+Почему
+почта
+почти
+Поэтому
+поэтому
+права
+Правда
+правда
+правило
+право
+практически
+предприятий
+предприятия
+председателя
+представителей
+представители
+прежде
+прежнему
+премьер
+премьера
+Пресс
+пресс
+при
+При
+придется
+примерно
+примеру
+принять
+приходится
+Причем
+пришлось
+про
+проблем
+проблема
+проблемы
+провести
+продукции
+проект
+производства
+производство
+произошло
+происходит
+прокуратуры
+просто
+против
+процента
+процентов
+процесс
+прошла
+прошлого
+прошлом
+прямо
+пути
+путь
+пятая
+пяти
+пять
+работа
+работавшую
+работает
+работать
+работе
+работу
+равно
+раз
+раза
+развития
+разных
+района
+районе
+ранее
+раньше
+резко
+результате
+Рейтер
+речь
+решения
+решил
+решили
+рода
+роль
+руб
+рук
+руках
+руки
+руководителей
+руководитель
+руководство
+ряд
+рядом
+с
+С
+сам
+сама
+сами
+самого
+самое
+самой
+самом
+самые
+самый
+самым
+самых
+сборной
+свет
+свое
+своего
+своей
+своем
+своему
+свои
+своим
+своими
+своих
+свой
+свою
+связи
+сделаем
+сделал
+сделать
+себе
+себя
+сегодня
+Сегодня
+сейчас
+Сейчас
+семьи
+сентября
+Сергей
+Сергея
+силу
+силы
+система
+системы
+ситуации
+ситуацию
+ситуация
+сих
+скажем
+сказал
+сказать
+сколько
+скорее
+следует
+слишком
+слова
+словам
+случае
+случай
+смерти
+снова
+со
+собой
+собственности
+событий
+события
+совершенно
+совсем
+создать
+сообща
+сообщил
+сообщили
+состоянии
+сотрудники
+сотрудников
+специалистов
+специалисты
+сразу
+среди
+Среди
+средств
+средства
+срок
+ссылка
+стал
+стала
+стали
+стало
+станет
+становится
+стате
+стать
+степени
+сто
+стоит
+столице
+столь
+столько
+сторону
+стороны
+суббота
+суда
+сумму
+сути
+существует
+счет
+считает
+считать
+считают
+т
+так
+Так
+такая
+также
+таки
+такие
+таким
+Таким
+таких
+такого
+такое
+такой
+там
+Там
+ТАСС
+те
+театра
+тебе
+тем
+Тем
+теперь
+Теперь
+территории
+тех
+течение
+то
+То
+тогда
+Тогда
+того
+тоже
+той
+только
+Только
+том
+тому
+тонн
+тот
+точки
+точнее
+трех
+три
+труда
+трудно
+туда
+тут
+ты
+тысяч
+тысячи
+у
+У
+удалось
+уж
+уже
+Уже
+уровень
+уровне
+условия
+условиях
+утверждает
+утверждают
+участие
+участников
+факт
+февраля
+фирм
+фирма
+фирмы
+фонда
+Фото
+х
+ходе
+хорошо
+хоть
+хотя
+Хотя
+хочет
+целом
+центр
+центра
+центре
+цены
+час
+часа
+часов
+части
+частности
+часто
+часть
+чаще
+чего
+человек
+человека
+чем
+через
+Через
+четыре
+четырех
+числе
+число
+членов
+что
+Что
+чтобы
+Чтобы
+чуть
+шесть
+эта
+Эта
+эти
+Эти
+этим
+этих
+это
+Это
+этого
+этой
+этом
+этому
+этот
+Этот
+эту
+Ю
+Юрий
+я
+Я
+являетесь
+является
+явно
+якобы
+января
+ясно
diff --git a/apps/common/src/python/mediawords/languages/sv/sv_stop_words.txt b/apps/common/src/python/mediawords/languages/sv/sv_stop_words.txt
index 0629e2deb2..5ad5904ccd 100644
--- a/apps/common/src/python/mediawords/languages/sv/sv_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/sv/sv_stop_words.txt
@@ -1,22 +1,42 @@
-#
 # This is a stop word list for the Swedish language.
 #
 # Sources:
 #     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     https://github.com/stopwords-iso/stopwords-sv/blob/master/stopwords-sv.txt
 #     that one Swedish journalist
-#
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
+aderton
+adertonde
+adjö
+aldrig
 alla
+allas
 allt
+alltid
+alltså
+andra
+andras
+annan
+annat
+artonde
+artonn
 att
 av
-blev
-bli
-blir
-blivit
+bara
+bland
 borde
+bort
+borta
 båda
+bådas
+dag
+dagar
+dagarna
+dagen
 de
+del
+delen
 dem
 den
 denna
@@ -29,125 +49,309 @@ detta
 dig
 din
 dina
+dit
 ditt
-dom
+dock
 du
 där
+därför
 då
+e
 efter
 eftersom
 egen
 ej
+elfte
 eller
+elva
+emot
 en
+enligt
+ens
 er
 era
+ers
 ert
 ett
+ettusen
 fanns
-finns
+fem
+femte
+femtio
+femtionde
+femton
+femtonde
+fick
+finnas
+fjorton
+fjortonde
+fjärde
+fler
+flera
+flesta
 från
+fyra
+fyrtio
+fyrtionde
 få
+får
+fått
+följande
 för
 före
 genom
+gick
 gjorde
 gjort
+god
+goda
+godare
+godast
+gälla
+gäller
+gällt
+gärna
+gå
+går
+gått
+gör
 göra
 ha
 hade
-hade
-han
+haft
 han
 hans
-hans
 har
+heller
+hellre
 hen
 henne
 hennes
+hit
 hon
 honom
+hundra
+hundraen
+hundraett
 hur
 här
+högst
 i
-i
+ibland
 icke
+idag
 igen
+igår
+imorgon
+in
+inför
+inga
 ingen
+ingenting
+inget
 innan
+inne
 inom
 inte
+inuti
+ja
 jag
+jo
 ju
+just
+jämfört
 kan
+kanske
+knappast
+kom
+komma
+kommer
+kommit
+kr
 kunde
+kunna
 kunnat
-lite
+kvar
+legat
+ligga
+ligger
 man
 med
+mej
 mellan
 men
+mer
+mera
+mest
 mig
 min
-min
 mina
 mitt
+mittemot
 mot
 mycket
+många
+måste
+möjlig
+möjligen
+möjligt
+möjligtvis
+nederst
+nej
+ner
 nere
 ni
+nio
+nionde
+nittio
+nittionde
+nitton
+nittonde
+nog
+noll
+nr
 nu
+nummer
 när
+nästa
 någon
+någonting
 något
 några
+nån
+nånting
+nåt
+nödvändig
+nödvändiga
+nödvändigt
+nödvändigtvis
 och
+också
+ofta
+oftast
+olika
+olikt
 om
-oss
 på
+rakt
+redan
+rätt
+sa
+sade
+sagt
 samma
 sedan
 sen
+senare
+senast
+sent
+sex
+sextio
+sextionde
+sexton
+sextonde
 sig
 sin
 sina
+sist
+sista
+siste
+sitt
 sitta
+sju
+sjunde
+sjuttio
+sjuttionde
+sjutton
+sjuttonde
 själv
+sjätte
+ska
+skall
 skulle
+slutligen
+snart
 som
+säga
+säger
 så
 sådan
 sådana
 sådant
 sån
-till
+ta
+tack
+tar
 till
 tills
+tio
+tionde
+tjugo
+tjugoen
+tjugoett
+tjugonde
+tjugotre
+tjugotvå
+tjungo
+tolfte
+tolv
+tre
+tredje
+trettio
+trettionde
+tretton
+trettonde
+två
+tvåhundra
 under
 upp
+ur
 ut
 utan
+utanför
 ute
+va
 vad
 var
 vara
 varför
+varifrån
 varit
 varje
+varken
 vars
+varsågod
 vart
 vem
+vems
+verkligen
 vi
 vid
+vidare
+viktigare
+viktigast
 vilka
 vilkas
 vilken
 vilket
-vår
+vill
+väl
+vänster
+vänstra
 våra
 vårat
 vårt
 än
+ännu
 är
+även
 åt
+åtminstone
+åtta
+åttio
+åttionde
+åttonde
 över
+övermorgon
+överst
+övre
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/sv/sv_stop_words_old.txt b/apps/common/src/python/mediawords/languages/sv/sv_stop_words_old.txt
new file mode 100644
index 0000000000..0629e2deb2
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/sv/sv_stop_words_old.txt
@@ -0,0 +1,153 @@
+#
+# This is a stop word list for the Swedish language.
+#
+# Sources:
+#     http://search.cpan.org/~creamyg/Lingua-StopWords-0.09/
+#     that one Swedish journalist
+#
+
+alla
+allt
+att
+av
+blev
+bli
+blir
+blivit
+borde
+båda
+de
+dem
+den
+denna
+dens
+deras
+dess
+dessa
+det
+detta
+dig
+din
+dina
+ditt
+dom
+du
+där
+då
+efter
+eftersom
+egen
+ej
+eller
+en
+er
+era
+ert
+ett
+fanns
+finns
+från
+få
+för
+före
+genom
+gjorde
+gjort
+göra
+ha
+hade
+hade
+han
+han
+hans
+hans
+har
+hen
+henne
+hennes
+hon
+honom
+hur
+här
+i
+i
+icke
+igen
+ingen
+innan
+inom
+inte
+jag
+ju
+kan
+kunde
+kunnat
+lite
+man
+med
+mellan
+men
+mig
+min
+min
+mina
+mitt
+mot
+mycket
+nere
+ni
+nu
+när
+någon
+något
+några
+och
+om
+oss
+på
+samma
+sedan
+sen
+sig
+sin
+sina
+sitta
+själv
+skulle
+som
+så
+sådan
+sådana
+sådant
+sån
+till
+till
+tills
+under
+upp
+ut
+utan
+ute
+vad
+var
+vara
+varför
+varit
+varje
+vars
+vart
+vem
+vi
+vid
+vilka
+vilkas
+vilken
+vilket
+vår
+våra
+vårat
+vårt
+än
+är
+åt
+över
diff --git a/apps/common/src/python/mediawords/languages/tr/tr_stop_words.txt b/apps/common/src/python/mediawords/languages/tr/tr_stop_words.txt
old mode 100755
new mode 100644
index 2418be327c..4736179ead
--- a/apps/common/src/python/mediawords/languages/tr/tr_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/tr/tr_stop_words.txt
@@ -1,44 +1,71 @@
-#
-# This is a stop word list for the Turkish language.
-#
 # Sources:
+#
 #     http://nlp.ceng.fatih.edu.tr/blog/?p=101
 #     http://www.ranks.nl/stopwords/turkish.html
-#
+#     https://github.com/stopwords-iso/stopwords-tr/blob/master/stopwords-tr.txt
+# (Lightly edited to remove words in the original lists that are actually meaningful)
 
 a
-acaba
+acep
+adamakıllı
+adeta
+ait
 altmýþ
+altmış
 altý
 altı
 ama
+amma
+anca
 ancak
+arada
+artýk
 artık
 asla
 aslında
+aynen
+ayrıca
 az
+açıkçası
 b
 bana
+bari
 bazen
 bazý
 bazı
 bazıları
 bazısı
+başkası
+baţka
 belki
 ben
 benden
 beni
 benim
+beri
+beriki
 beþ
 beş
+beţ
+bilcümle
 bile
 bin
+binaen
+binaenaleyh
 bir
+biraz
+birazdan
+birbiri
+birden
+birdenbire
 biri
+birice
+birileri
 birisi
 birkaç
 birkaçı
 birkez
+birlikte
 birçok
 birçokları
 birçoğu
@@ -46,56 +73,145 @@ birþey
 birþeyi
 birşey
 birşeyi
+birţey
+bitevi
+biteviye
+bittabi
 biz
+bizatihi
+bizce
+bizcileyin
 bizden
 bize
 bizi
 bizim
+bizimki
+bizzat
 bu
 buna
 bunda
 bundan
+bunlar
+bunları
+bunların
 bunu
 bunun
+buracıkta
 burada
+buradan
+burası
 böyle
 böylece
+böylecene
+böylelikle
+böylemesine
+böylesine
+büsbütün
 bütün
 c
+cümlesi
 d
 da
 daha
 dahi
+dahil
+dahilen
+daima
+dair
+dayanarak
 de
 defa
+dek
 demek
+demin
+demincek
+deminden
+denli
+derakap
+derhal
+derken
+deđil
 değil
+değin
 diye
+diđer
 diğer
 diğeri
 diğerleri
 doksan
 dokuz
 dolayı
+dolayısıyla
+doğru
 dört
 e
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+elbet
 elbette
 elli
+emme
 en
 en  gibi
+enikonu
+epey
+epeyce
+epeyi
+esasen
+esnasında
+etmesi
+etti
+ettiği
+ettiğini
+evleviyetle
+evvel
+evvela
+evvelce
+evvelden
+evvelemirde
+evveli
+eđer
+eğer
 f
 fakat
 falan
 felan
-filan
+filanca
 g
+gah
+gayet
+gayetle
+gayri
+gayrı
+gelgelelim
 gene
+gerek
+gerçi
+geçende
+geçenlerde
 gibi
+gibilerden
+gibisinden
+göre
 h
+hakeza
+halbuki
+halen
+halihazırda
+haliyle
+handiyse
 hangi
 hangisi
 hani
+hariç
+hasebiyle
 hatta
+hele
 hem
 henüz
 hep
@@ -104,32 +220,64 @@ hepsine
 hepsini
 her
 her biri
+herhangi
 herkes
 herkese
 herkesi
+herkesin
 hiç
 hiç kimse
+hiçbir
 hiçbiri
 hiçbirine
 hiçbirini
 hâlâ
 i
+iken
 iki
+ila
 ile
-INSERmi
+ilgili
+ilk
+illa
+illaki
+imdi
+indinde
+insermi
 ise
+ister
+itibaren
+itibariyle
+itibarıyla
+iyi
+iyice
+iyicene
 için
 içinde
-işte
+iţte
 j
 k
 kadar
+kah
+kala
+kanýmca
+karşın
 katrilyon
-kaç
+kaynak
+kaçı
 kendi
+kendilerine
 kendine
 kendini
+kendisi
+kendisine
+kendisini
+kere
 kez
+keza
+kezalik
+keşke
+keţke
 ki
 kim
 kimden
@@ -137,10 +285,26 @@ kime
 kimi
 kimin
 kimisi
+kimse
+kimsecik
+kimsecikler
+külliyen
 kýrk
+kýsaca
+kırk
+kısaca
 l
+lakin
+lütfen
 m
+maada
 madem
+mademki
+mamafih
+mebni
+meğer
+meğerki
+meğerse
 mi
 milyar
 milyon
@@ -151,39 +315,93 @@ mı
 n
 nasýl
 nasıl
+nasılsa
+nazaran
 ne
 ne kadar
 ne zaman
 neden
+nedeniyle
+nedenle
+nedense
 nedir
 nerde
+nerden
+nerdeyse
+nere
 nerede
 nereden
+neredeyse
+neresi
 nereye
 nesi
+netekim
+neye
+neyi
 neyse
+nice
+nihayet
+nihayetinde
+nitekim
 niye
 niçin
 o
+olan
+olarak
+oldu
+olduklarını
+oldukça
+olduğu
+olduğunu
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
 on
 ona
+onca
+onculayın
+onda
 ondan
 onlar
 onlara
 onlardan
 onlari
 onlarýn
+onları
 onların
 onu
 onu otuz
 onun
+oracık
+oracıkta
 orada
+oradan
+oranca
+oranla
+oraya
+otuz
 oysa
 oysaki
 p
+pek
+pekala
+peki
+peyderpey
 r
 rağmen
 s
+sadece
+sahi
+sahiden
 sana
 sanki
 sekiz
@@ -199,44 +417,104 @@ sizi
 sizin
 son
 sonra
+sonradan
+sonraları
+sonunda
 t
 tabi
+tabii
+tam
 tamam
+tamamen
+tamamıyla
+tarafından
+tek
 trilyon
 tüm
 tümü
 u
 v
 var
+vardı
+vasıtasıyla
 ve
+velev
+velhasıl
+velhasılıkelam
 veya
 veyahut
 y
 ya
 ya da
+yahut
+yakinen
+yakında
+yakından
+yakınlarda
+yalnız
+yalnızca
 yani
+yapacak
+yapmak
+yaptı
+yaptıkları
+yaptığı
+yaptığını
+yapılan
+yapılması
+yapıyor
 yedi
+yeniden
+yenilerde
 yerine
 yetmiþ
+yetmiş
+yetmiţ
 yine
 yirmi
+yok
 yoksa
+yoluyla
 yüz
+yüzünden
 z
+zarfında
 zaten
+zati
 zira
 ç
+çabuk
+çabukça
+çeşitli
 çok
+çokları
+çoklarınca
+çokluk
+çoklukla
+çokça
 çoğu
+çoğun
 çoğuna
+çoğunca
+çoğunlukla
 çoğunu
 çünkü
 ö
+öbürkü
 öbürü
 ön
 önce
+önceden
+önceleri
+öncelikle
+öteki
+ötekisi
 ötürü
 öyle
+öylece
+öylelikle
+öylemesine
+öz
 ü
 üzere
 üç
@@ -261,9 +539,18 @@ zira
 şimdi
 şu
 şuna
+şuncacık
 şunda
 şundan
 şunlar
+şunları
 şunu
 şunun
+şura
+şuracıkta
+şurası
 şöyle
+ţayet
+ţimdi
+ţu
+ţöyle
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/languages/tr/tr_stop_words_old.txt b/apps/common/src/python/mediawords/languages/tr/tr_stop_words_old.txt
new file mode 100755
index 0000000000..2418be327c
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/tr/tr_stop_words_old.txt
@@ -0,0 +1,269 @@
+#
+# This is a stop word list for the Turkish language.
+#
+# Sources:
+#     http://nlp.ceng.fatih.edu.tr/blog/?p=101
+#     http://www.ranks.nl/stopwords/turkish.html
+#
+
+a
+acaba
+altmýþ
+altý
+altı
+ama
+ancak
+artık
+asla
+aslında
+az
+b
+bana
+bazen
+bazý
+bazı
+bazıları
+bazısı
+belki
+ben
+benden
+beni
+benim
+beþ
+beş
+bile
+bin
+bir
+biri
+birisi
+birkaç
+birkaçı
+birkez
+birçok
+birçokları
+birçoğu
+birþey
+birþeyi
+birşey
+birşeyi
+biz
+bizden
+bize
+bizi
+bizim
+bu
+buna
+bunda
+bundan
+bunu
+bunun
+burada
+böyle
+böylece
+bütün
+c
+d
+da
+daha
+dahi
+de
+defa
+demek
+değil
+diye
+diğer
+diğeri
+diğerleri
+doksan
+dokuz
+dolayı
+dört
+e
+elbette
+elli
+en
+en  gibi
+f
+fakat
+falan
+felan
+filan
+g
+gene
+gibi
+h
+hangi
+hangisi
+hani
+hatta
+hem
+henüz
+hep
+hepsi
+hepsine
+hepsini
+her
+her biri
+herkes
+herkese
+herkesi
+hiç
+hiç kimse
+hiçbiri
+hiçbirine
+hiçbirini
+hâlâ
+i
+iki
+ile
+INSERmi
+ise
+için
+içinde
+işte
+j
+k
+kadar
+katrilyon
+kaç
+kendi
+kendine
+kendini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimin
+kimisi
+kýrk
+l
+m
+madem
+mi
+milyar
+milyon
+mu
+mü
+mý
+mı
+n
+nasýl
+nasıl
+ne
+ne kadar
+ne zaman
+neden
+nedir
+nerde
+nerede
+nereden
+nereye
+nesi
+neyse
+niye
+niçin
+o
+on
+ona
+ondan
+onlar
+onlara
+onlardan
+onlari
+onlarýn
+onların
+onu
+onu otuz
+onun
+orada
+oysa
+oysaki
+p
+r
+rağmen
+s
+sana
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+size
+sizi
+sizin
+son
+sonra
+t
+tabi
+tamam
+trilyon
+tüm
+tümü
+u
+v
+var
+ve
+veya
+veyahut
+y
+ya
+ya da
+yani
+yedi
+yerine
+yetmiþ
+yine
+yirmi
+yoksa
+yüz
+z
+zaten
+zira
+ç
+çok
+çoğu
+çoğuna
+çoğunu
+çünkü
+ö
+öbürü
+ön
+önce
+ötürü
+öyle
+ü
+üzere
+üç
+þey
+þeyden
+þeyi
+þeyler
+þu
+þuna
+þunda
+þundan
+þunu
+ğ
+ı
+ş
+şayet
+şey
+şeyden
+şeye
+şeyi
+şeyler
+şimdi
+şu
+şuna
+şunda
+şundan
+şunlar
+şunu
+şunun
+şöyle
diff --git a/apps/common/src/python/mediawords/languages/zh/__init__.py b/apps/common/src/python/mediawords/languages/zh/__init__.py
index 909365b9dd..eb011d37be 100644
--- a/apps/common/src/python/mediawords/languages/zh/__init__.py
+++ b/apps/common/src/python/mediawords/languages/zh/__init__.py
@@ -32,6 +32,9 @@ class ChineseLanguage(StopWordsFromFileMixIn):
         # Stop words map
         '__stop_words_map',
 
+        # FIXME remove once stopword comparison is over
+        '__stop_words_old_map',
+
         # Jieba instance
         '__jieba',
 
diff --git a/apps/common/src/python/mediawords/languages/zh/zh_stop_words.txt b/apps/common/src/python/mediawords/languages/zh/zh_stop_words.txt
index 3eb0376f33..f80f970b44 100644
--- a/apps/common/src/python/mediawords/languages/zh/zh_stop_words.txt
+++ b/apps/common/src/python/mediawords/languages/zh/zh_stop_words.txt
@@ -1,10 +1,12 @@
-# Appended Traditional Chinese characters (Note: This does not include all stopwords in Cantonese or Taiwanese Mandarin)
 # Sources:
 # http://blog.csdn.net/shijiebei2009/article/details/39696571
 # http://github.com/stopwords-iso/stopwords-zh
+#
+# (Lightly edited to remove words in the original lists that are actually meaningful)
+# Appended Traditional Chinese characters (Note: This does not include all stopwords in Cantonese or Taiwanese Mandarin
+
 !
 "
-#
 $
 %
 &
@@ -129,38 +131,6 @@ sup
 一
 一.
 一一
-一下
-一个
-一些
-一何
-一來
-一個
-一切
-一则
-一则通过
-一則
-一則通過
-一天
-一定
-一方面
-一旦
-一时
-一時
-一来
-一样
-一樣
-一次
-一片
-一番
-一直
-一致
-一般
-一起
-一轉眼
-一转眼
-一边
-一邊
-一面
 七
 万一
 三
@@ -204,8 +174,6 @@ sup
 不光
 不免
 不再
-不力
-不勝
 不单
 不变
 不只
@@ -224,8 +192,6 @@ sup
 不如
 不妨
 不定
-不对
-不對
 不少
 不尽
 不尽然
@@ -239,12 +205,8 @@ sup
 不必
 不怎么
 不怎麼
-不怕
 不惟
-不成
 不拘
-不择手段
-不擇手段
 不敢
 不料
 不断
@@ -259,8 +221,6 @@ sup
 不止一次
 不比
 不消
-不满
-不滿
 不然
 不然的話
 不然的话
@@ -277,7 +237,6 @@ sup
 不管怎樣
 不經意
 不经意
-不胜
 不能
 不能不
 不至于
@@ -306,8 +265,6 @@ sup
 且說
 且说
 两者
-严格
-严重
 並
 並不
 並不是
@@ -325,7 +282,6 @@ sup
 中小
 中間
 中间
-丰富
 串行
 临
 临到
@@ -341,7 +297,6 @@ sup
 主张
 主張
 主要
-举凡
 举行
 乃
 乃至
@@ -355,20 +310,12 @@ sup
 之後
 之所以
 之类
-之類
-乌乎
-乎
-乒
 乘
-乘势
 乘勝
 乘勢
 乘机
 乘機
 乘胜
-乘虚
-乘虛
-乘隙
 九
 也
 也好
@@ -397,7 +344,6 @@ sup
 互相
 五
 些
-交口
 亦
 产生
 亲口
@@ -437,9 +383,6 @@ sup
 从中
 从事
 从今以后
-从优
-从古到今
-从古至今
 从头
 从宽
 从小
@@ -491,17 +434,12 @@ sup
 任憑
 企图
 企圖
-伙同
 会
-伟大
 传
-传说
-传闻
 似乎
 似的
 但
 但凡
-但愿
 但是
 但願
 何
@@ -550,16 +488,9 @@ sup
 依據
 依照
 依靠
-便
 便于
 便於
 係
-促进
-促進
-保持
-保管
-保险
-保險
 俺
 俺们
 俺們
@@ -584,7 +515,6 @@ sup
 假使
 假如
 假若
-偉大
 偏偏
 做到
 偶尔
@@ -592,8 +522,6 @@ sup
 偶而
 傥然
 傳
-傳聞
-傳說
 僅
 僅僅
 像
@@ -633,7 +561,6 @@ sup
 八成
 公然
 六
-兮
 共
 共同
 共总
@@ -676,8 +603,6 @@ sup
 再說
 再説
 再说
-冒
-冲
 决不
 决定
 决非
@@ -716,6 +641,7 @@ sup
 切莫
 则
 则甚
+则通过
 刚
 刚好
 刚巧
@@ -750,6 +676,7 @@ sup
 到頭來
 則
 則甚
+則通過
 前后
 前後
 前此
@@ -765,14 +692,8 @@ sup
 加之
 加以
 加入
-加強
-加强
 动不动
-动辄
-勃然
 動不動
-動輒
-匆匆
 十分
 千
 千万
@@ -831,8 +752,6 @@ sup
 取道
 受到
 变成
-古來
-古来
 另
 另一个
 另一個
@@ -843,7 +762,6 @@ sup
 另行
 只
 只当
-只怕
 只是
 只有
 只消
@@ -854,12 +772,8 @@ sup
 叫做
 召开
 召開
-叮咚
-叮噹
-叮当
 可
 可以
-可好
 可是
 可能
 可見
@@ -898,14 +812,11 @@ sup
 吧
 吧哒
 吧噠
-吱
 吶
 呀
 呃
 呆呆地
 呐
-呕
-呗
 呜
 呜呼
 呢
@@ -913,29 +824,19 @@ sup
 周圍
 呵
 呵呵
-呸
-呼哧
-呼啦
 咁
 咋
 和
-咚
-咦
 咧
 咱
 咱们
 咱們
-咳
 哇
 哈
 哈哈
 哉
 哎
-哎呀
-哎哟
 哎喲
-哗
-哗啦
 哟
 哦
 哩
@@ -957,7 +858,6 @@ sup
 哪里
 哼
 哼唷
-唄
 唉
 唔
 唯有
@@ -966,14 +866,10 @@ sup
 啊哈
 啊哟
 啊喲
-問題
-啐
 啥
 啦
 啪达
 啪達
-啷噹
-啷当
 喀
 喂
 喏
@@ -987,28 +883,16 @@ sup
 嗚
 嗚呼
 嗡
-嗡嗡
 嗬
 嗯
-嗳
-嘅
 嘍
 嘎
-嘎嘎
 嘎登
-嘔
-嘘
 嘛
 嘩
 嘩啦
-嘻
 嘿
 嘿嘿
-噓
-噯
-嚇
-嚴格
-嚴重
 四
 因
 因为
@@ -1019,7 +903,6 @@ sup
 因着
 因而
 因著
-固
 固然
 在
 在下
@@ -1040,7 +923,6 @@ sup
 处在
 处处
 处理
-复杂
 多
 多么
 多亏
@@ -1060,20 +942,13 @@ sup
 夠瞧的
 夥同
 大
-大不了
-大举
-大事
 大体
 大体上
-大凡
-大力
 大多
 大多数
 大多數
 大大
 大家
-大张旗鼓
-大張旗鼓
 大批
 大抵
 大概
@@ -1081,24 +956,16 @@ sup
 大約
 大约
 大致
-大舉
 大都
 大量
-大面儿上
-大面兒上
 大體
 大體上
-失去
-奇
-奈
-奋勇
-奮勇
+天
 她
 她们
 她們
 她是
 她的
-好
 好像
 好在
 好的
@@ -1137,14 +1004,9 @@ sup
 它們的
 它是
 它的
-安全
-完全
 完成
 定
-实现
 实际
-宣布
-容易
 密切
 實現
 實際
@@ -1152,22 +1014,17 @@ sup
 寧可
 寧肯
 寧願
-对
 对于
 对应
 对待
 对方
 对比
 将
-将才
 将要
 将近
 將
-將才
 將要
 將近
-專門
-對
 對待
 對應
 對方
@@ -1221,7 +1078,6 @@ sup
 岂但
 岂止
 岂非
-川流不息
 左右
 巨大
 巩固
@@ -1234,7 +1090,6 @@ sup
 已經
 已经
 巴
-巴巴
 带
 帮助
 帶
@@ -1266,26 +1121,17 @@ sup
 幾時
 幾番
 幾經
-广大
 广泛
 应当
 应用
 应该
-庶乎
-庶几
-庶幾
 廣大
 廣泛
 开外
 开始
 开展
 引起
-弗
-強烈
-強調
 弹指之间
-强烈
-强调
 彈指之間
 归
 归根到底
@@ -1300,8 +1146,6 @@ sup
 当口儿
 当地
 当场
-当头
-当庭
 当时
 当然
 当真
@@ -1329,27 +1173,16 @@ sup
 得了
 得出
 得到
-得天独厚
-得天獨厚
-得起
 從
 從不
 從中
 從事
 從今以後
 從來
-從優
-從古到今
-從古至今
-從嚴
-從寬
-從小
-從新
 從早到晚
 從未
 從此
 從此以後
-從無到有
 從而
 從輕
 從速
@@ -1363,8 +1196,6 @@ sup
 必定
 必将
 必將
-必然
-必要
 必須
 必须
 快
@@ -1375,16 +1206,12 @@ sup
 怎么
 怎么办
 怎么样
-怎奈
 怎样
 怎樣
 怎麼
 怎麼樣
 怎麼辦
 怎麽
-怕
-急匆匆
-怪
 怪不得
 总之
 总是
@@ -1394,7 +1221,6 @@ sup
 总结
 总而言之
 恍然
-恐怕
 恰似
 恰好
 恰如
@@ -1407,23 +1233,15 @@ sup
 您們
 您是
 惟其
-惯常
 意思
-愤然
 愿意
 慢說
 慢説
 慢说
 慣常
-憑
-憑藉
-憤然
-應用
 應當
 應該
 成为
-成年
-成年累月
 成心
 成為
 我
@@ -1440,7 +1258,6 @@ sup
 或者
 或許
 或许
-战斗
 截然
 截至
 戰鬥
@@ -1451,20 +1268,10 @@ sup
 所有
 所謂
 所谓
-才
-才能
-扑通
-打
-打从
-打开天窗说亮话
-打從
-打開天窗說亮話
-扩大
 把
 抑或
 报导
 报道
-抽冷子
 拦腰
 拿
 指
@@ -1472,7 +1279,6 @@ sup
 按
 按时
 按時
-按期
 按照
 按理
 按說
@@ -1508,7 +1314,6 @@ sup
 換句話說
 換句話説
 換言之
-撲通
 據
 據實
 據悉
@@ -1516,7 +1321,6 @@ sup
 據此
 據稱
 據說
-擴大
 攔腰
 放量
 故
@@ -1538,7 +1342,6 @@ sup
 方
 方便
 方才
-方能
 方面
 於
 於是
@@ -1560,11 +1363,9 @@ sup
 日漸
 日益
 日臻
-日見
-日见
+旦
 时
 时候
-昂然
 明显
 明确
 明確
@@ -1578,8 +1379,6 @@ sup
 显著
 時
 時候
-普通
-普遍
 暗中
 暗地裡
 暗地里
@@ -1594,21 +1393,17 @@ sup
 曾經
 曾经
 替
-替代
 最
 最后
 最大
 最好
 最後
 最近
-最高
 會
 月
 有
 有些
 有关
-有利
-有力
 有及
 有所
 有效
@@ -1645,10 +1440,6 @@ sup
 极为
 极了
 极其
-极力
-极大
-极度
-极端
 构成
 果然
 果真
@@ -1657,32 +1448,29 @@ sup
 某些
 某個
 某某
+样
 根据
 根據
 根本
 格外
-梆
-極
 極了
 極其
 極力
-極大
 極度
 極為
 極端
 概
 構成
+樣
 權時
+次
 次第
 欢迎
 欤
 歟
-歡迎
-正值
 正在
 正如
 正巧
-正常
 正是
 此
 此中
@@ -1701,7 +1489,6 @@ sup
 歸
 歸根到底
 歸根結底
-歸齊
 殆
 毋宁
 毋寧
@@ -1734,7 +1521,6 @@ sup
 毫無
 毫無例外
 毫無保留地
-汝
 決不
 決定
 決非
@@ -1743,22 +1529,14 @@ sup
 沒有
 沙沙
 没
-没奈何
 没有
 沿
 沿着
 沿著
 況且
 注意
-活
-深入
-清楚
 湊巧
 準備
-满
-满足
-滿
-滿足
 漫說
 漫説
 漫说
@@ -1771,8 +1549,6 @@ sup
 為止
 為此
 為著
-烏乎
-焉
 無
 無寧
 無法
@@ -1797,18 +1573,14 @@ sup
 爾後
 爾爾
 爾等
+片
 牢牢
 特別是
 特别是
-特殊
 特点
-特約
-特约
 特點
-犹且
 犹自
 独
-独媒特约
 独自
 猛然
 猛然間
@@ -1816,15 +1588,12 @@ sup
 猶且
 猶自
 獨
-獨媒特約
 獨自
 獲得
 率尔
 率然
 率爾
-现代
 现在
-現代
 現在
 理应
 理当
@@ -1862,6 +1631,7 @@ sup
 略加
 略微
 略為
+番
 當
 當下
 當中
@@ -1869,7 +1639,6 @@ sup
 當前
 當即
 當口兒
-當地
 當場
 當庭
 當時
@@ -1877,8 +1646,6 @@ sup
 當真
 當着
 當著
-當頭
-白
 白白
 的
 的确
@@ -1886,7 +1653,6 @@ sup
 的話
 的话
 皆可
-盡
 盡可能
 盡如人意
 盡心盡力
@@ -1895,6 +1661,7 @@ sup
 盡然
 盡量
 目前
+直
 直到
 直接
 相似
@@ -1937,8 +1704,6 @@ sup
 确定
 碰巧
 確定
-社会主义
-社會主義
 离
 种
 积极
@@ -1949,12 +1714,8 @@ sup
 稱
 積極
 究竟
-穷年累月
 突出
 突然
-窃
-窮年累月
-竊
 立
 立刻
 立即
@@ -2030,7 +1791,6 @@ sup
 纵令
 纵使
 纵然
-练习
 组成
 经
 经常
@@ -2044,7 +1804,6 @@ sup
 绝非
 绝顶
 继之
-继后
 继续
 继而
 维持
@@ -2052,8 +1811,6 @@ sup
 缕缕
 罢了
 罷了
-老
-老大
 老是
 老老实实
 老老實實
@@ -2074,11 +1831,6 @@ sup
 而論
 而论
 联系
-联袂
-聯繫
-聯袂
-背地裡
-背地里
 背靠背
 能
 能否
@@ -2098,7 +1850,6 @@ sup
 自己
 自後
 自從
-自打
 自身
 臭
 至
@@ -2112,17 +1863,14 @@ sup
 與其說
 與否
 與此同時
-舉凡
 舉行
+般
 般的
 良好
 若
-若夫
 若是
-若果
 若非
 范围
-莫
 莫不
 莫不然
 莫如
@@ -2147,12 +1895,10 @@ sup
 行动
 行動
 行為
-衝
 表明
 表示
 被
 裡面
-複雜
 要
 要不
 要不是
@@ -2178,7 +1924,6 @@ sup
 許多
 話說
 該
-該當
 認為
 認爲
 認真
@@ -2195,18 +1940,13 @@ sup
 誰知
 請勿
 論
-論說
 諸
 諸位
 諸如
 謹
-譬喻
 譬如
 變成
 讓
-认为
-认真
-认识
 让
 许多
 论
@@ -2237,12 +1977,9 @@ sup
 豈但
 豈止
 豈非
-豐富
-賊死
 賴以
 贼死
 赖以
-赶
 赶快
 赶早不赶晚
 起
@@ -2255,10 +1992,6 @@ sup
 起见
 起頭
 起首
-趁
-趁便
-趁势
-趁勢
 趁早
 趁机
 趁機
@@ -2267,8 +2000,6 @@ sup
 趁着
 趁著
 越是
-趕
-趕快
 趕早不趕晚
 距
 跟
@@ -2278,23 +2009,15 @@ sup
 較之
 較比
 較為
-轉動
-轉變
-轉貼
-轟然
-转动
-转变
-转贴
-轰然
+轉眼
+转眼
 较
 较为
 较之
 较比
 边
 达到
-达旦
 迄
-迅速
 过
 过于
 过去
@@ -2334,7 +2057,6 @@ sup
 进入
 进去
 进来
-进步
 进而
 进行
 连
@@ -2400,8 +2122,6 @@ sup
 過去
 過於
 達到
-達旦
-適應
 適用
 適當
 遭到
@@ -2468,7 +2188,6 @@ sup
 關於
 问题
 间或
-防止
 阿
 附近
 陈年
@@ -2487,7 +2206,6 @@ sup
 除此而外
 除開
 除非
-陳年
 随
 随后
 随时
@@ -2504,7 +2222,6 @@ sup
 难说
 难道
 难道说
-集中
 雖
 雖則
 雖然
@@ -2517,24 +2234,21 @@ sup
 難說
 難道
 難道說
-雲爾
 零
 需要
 非但
 非常
 非徒
 非得
-非特
 非独
 非獨
 靠
-鞏固
+面
 頂多
 頃
 頃刻
 頃刻之間
 頃刻間
-順
 順着
 順著
 頓時
@@ -2555,12 +2269,10 @@ sup
 風雨無阻
 风雨无阻
 飽
-餘外
 餵
 饱
 首先
 馬上
-騰
 马上
 高低
 高兴
@@ -2568,8 +2280,6 @@ sup
 麼
 默然
 默默地
-齊
-齐
 ︿
 ！
 ＃
diff --git a/apps/common/src/python/mediawords/languages/zh/zh_stop_words_old.txt b/apps/common/src/python/mediawords/languages/zh/zh_stop_words_old.txt
new file mode 100644
index 0000000000..3eb0376f33
--- /dev/null
+++ b/apps/common/src/python/mediawords/languages/zh/zh_stop_words_old.txt
@@ -0,0 +1,2727 @@
+# Appended Traditional Chinese characters (Note: This does not include all stopwords in Cantonese or Taiwanese Mandarin)
+# Sources:
+# http://blog.csdn.net/shijiebei2009/article/details/39696571
+# http://github.com/stopwords-iso/stopwords-zh
+!
+"
+#
+$
+%
+&
+(
+)
+*
++
+,
+-
+--
+.
+..
+...
+......
+...................
+./
+.一
+.数
+.數
+.日
+/
+//
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+://
+::
+;
+<
+=
+>
+>>
+?
+@
+[
+\
+]
+^
+_
+`
+A
+exp
+Lex
+sub
+sup
+|
+}
+~
+~~~~
+·
+×
+×××
+γ
+Δ
+μ
+φ
+φ．
+Ψ
+В
+—
+——
+———
+‘
+’
+’‘
+“
+”
+”，
+…
+……
+…………………………………………………③
+′∈
+′｜
+℃
+Ⅲ
+↑
+→
+∈［
+∪φ∈
+≈
+①
+②
+②ｃ
+③
+③］
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+──
+■
+▲
+　
+、
+。
+〈
+〉
+《
+》
+》），
+「
+」
+『
+』
+【
+】
+〔
+〕
+〕〔
+㈧
+一
+一.
+一一
+一下
+一个
+一些
+一何
+一來
+一個
+一切
+一则
+一则通过
+一則
+一則通過
+一天
+一定
+一方面
+一旦
+一时
+一時
+一来
+一样
+一樣
+一次
+一片
+一番
+一直
+一致
+一般
+一起
+一轉眼
+一转眼
+一边
+一邊
+一面
+七
+万一
+三
+三天两头
+三天兩頭
+三番两次
+三番五次
+三番兩次
+上
+上下
+上來
+上升
+上去
+上来
+上述
+上面
+下
+下來
+下列
+下去
+下来
+下面
+不
+不一
+不下
+不久
+不了
+不亦乐乎
+不亦樂乎
+不仅
+不仅...而且
+不仅仅
+不仅仅是
+不会
+不但
+不但...而且
+不僅
+不僅...而且
+不僅僅
+不僅僅是
+不光
+不免
+不再
+不力
+不勝
+不单
+不变
+不只
+不可
+不可开交
+不可抗拒
+不可開交
+不同
+不問
+不單
+不外
+不外乎
+不够
+不夠
+不大
+不如
+不妨
+不定
+不对
+不對
+不少
+不尽
+不尽然
+不巧
+不已
+不常
+不得
+不得不
+不得了
+不得已
+不必
+不怎么
+不怎麼
+不怕
+不惟
+不成
+不拘
+不择手段
+不擇手段
+不敢
+不料
+不断
+不斷
+不日
+不时
+不是
+不時
+不曾
+不會
+不止
+不止一次
+不比
+不消
+不满
+不滿
+不然
+不然的話
+不然的话
+不特
+不独
+不獨
+不由得
+不盡
+不盡然
+不知不覺
+不知不觉
+不管
+不管怎样
+不管怎樣
+不經意
+不经意
+不胜
+不能
+不能不
+不至于
+不至於
+不若
+不要
+不論
+不變
+不论
+不起
+不足
+不过
+不迭
+不過
+不问
+不限
+与
+与其
+与其说
+与否
+与此同时
+专门
+且
+且不說
+且不说
+且說
+且说
+两者
+严格
+严重
+並
+並不
+並不是
+並且
+並排
+並沒
+並沒有
+並無
+並肩
+並非
+个
+个人
+个别
+中
+中小
+中間
+中间
+丰富
+串行
+临
+临到
+为
+为主
+为了
+为什么
+为什麽
+为何
+为止
+为此
+为着
+主张
+主張
+主要
+举凡
+举行
+乃
+乃至
+乃至于
+乃至於
+么
+之
+之一
+之前
+之后
+之後
+之所以
+之类
+之類
+乌乎
+乎
+乒
+乘
+乘势
+乘勝
+乘勢
+乘机
+乘機
+乘胜
+乘虚
+乘虛
+乘隙
+九
+也
+也好
+也就是說
+也就是说
+也是
+也罢
+也罷
+了
+了解
+争取
+二
+二來
+二来
+二話不說
+二話沒說
+二话不说
+二话没说
+于
+于是
+于是乎
+云云
+云尔
+云爾
+互
+互相
+五
+些
+交口
+亦
+产生
+亲口
+亲手
+亲眼
+亲自
+亲身
+人
+人人
+人们
+人們
+人家
+人民
+什么
+什么样
+什麼
+什麼樣
+什麽
+仅
+仅仅
+今
+今后
+今天
+今年
+今後
+今日
+今次
+介于
+介於
+仍
+仍旧
+仍然
+仍舊
+从
+从不
+从严
+从中
+从事
+从今以后
+从优
+从古到今
+从古至今
+从头
+从宽
+从小
+从新
+从无到有
+从早到晚
+从未
+从来
+从此
+从此以后
+从而
+从轻
+从速
+从重
+他
+他人
+他们
+他們
+他是
+他的
+代替
+令
+以
+以上
+以下
+以为
+以來
+以便
+以免
+以前
+以及
+以后
+以外
+以後
+以故
+以期
+以来
+以為
+以至
+以至于
+以至於
+以致
+们
+任
+任何
+任凭
+任务
+任務
+任憑
+企图
+企圖
+伙同
+会
+伟大
+传
+传说
+传闻
+似乎
+似的
+但
+但凡
+但愿
+但是
+但願
+何
+何乐而不为
+何以
+何况
+何嘗
+何处
+何妨
+何尝
+何必
+何时
+何時
+何樂而不為
+何止
+何況
+何苦
+何處
+何須
+何须
+余外
+作为
+作為
+作爲
+你
+你们
+你們
+你是
+你的
+佢
+使
+使得
+使用
+來
+來不及
+來得及
+來看
+來着
+來自
+來著
+來說
+來講
+例如
+依
+依据
+依據
+依照
+依靠
+便
+便于
+便於
+係
+促进
+促進
+保持
+保管
+保险
+保險
+俺
+俺们
+俺們
+個
+個人
+個別
+倍加
+倍感
+們
+倒不如
+倒不如說
+倒不如说
+倒是
+倘
+倘使
+倘或
+倘然
+倘若
+借
+借以
+借此
+假使
+假如
+假若
+偉大
+偏偏
+做到
+偶尔
+偶爾
+偶而
+傥然
+傳
+傳聞
+傳說
+僅
+僅僅
+像
+儘
+儘早
+儘管
+儘管如此
+儻然
+儿
+允許
+允许
+元／吨
+元／噸
+充其极
+充其極
+充其量
+充分
+先不先
+先后
+先後
+先生
+光
+光是
+兒
+內
+全体
+全力
+全年
+全然
+全身心
+全部
+全都
+全面
+全體
+兩者
+八
+八成
+公然
+六
+兮
+共
+共同
+共总
+共總
+关于
+其
+其一
+其中
+其二
+其他
+其余
+其后
+其它
+其实
+其實
+其後
+其次
+其餘
+具体
+具体地说
+具体来说
+具体说来
+具有
+具體
+具體來說
+具體來説
+具體地說
+具體說來
+兼之
+内
+再
+再其次
+再则
+再則
+再有
+再次
+再者
+再者說
+再者说
+再說
+再説
+再说
+冒
+冲
+决不
+决定
+决非
+况且
+准备
+凑巧
+凝神
+几
+几乎
+几度
+几时
+几番
+几经
+凡
+凡是
+凭
+凭借
+出
+出于
+出來
+出去
+出於
+出来
+出现
+出現
+分別
+分别
+分头
+分期
+分期分批
+分頭
+切
+切不可
+切切
+切勿
+切莫
+则
+则甚
+刚
+刚好
+刚巧
+刚才
+初
+別
+別人
+別是
+別的
+別管
+別處
+別說
+別説
+别
+别人
+别处
+别是
+别的
+别管
+别说
+到
+到了儿
+到了兒
+到处
+到头
+到头来
+到底
+到目前为止
+到目前為止
+到處
+到頭
+到頭來
+則
+則甚
+前后
+前後
+前此
+前者
+前进
+前進
+前面
+剛
+剛好
+剛巧
+剛才
+加上
+加之
+加以
+加入
+加強
+加强
+动不动
+动辄
+勃然
+動不動
+動輒
+匆匆
+十分
+千
+千万
+千万千万
+千萬
+千萬千萬
+半
+单
+单单
+单纯
+即
+即令
+即使
+即便
+即刻
+即如
+即将
+即將
+即或
+即是說
+即是说
+即若
+却
+却不
+卻
+卻不
+历
+原來
+原来
+去
+又
+又及
+及
+及其
+及时
+及時
+及至
+双方
+反之
+反之亦然
+反之则
+反之則
+反倒
+反倒是
+反应
+反應
+反手
+反映
+反而
+反过来
+反过来说
+反過來
+反過來說
+反過來説
+取得
+取道
+受到
+变成
+古來
+古来
+另
+另一个
+另一個
+另一方面
+另外
+另悉
+另方面
+另行
+只
+只当
+只怕
+只是
+只有
+只消
+只當
+只要
+只限
+叫
+叫做
+召开
+召開
+叮咚
+叮噹
+叮当
+可
+可以
+可好
+可是
+可能
+可見
+可见
+各
+各个
+各人
+各位
+各個
+各地
+各式
+各种
+各種
+各級
+各级
+各自
+合理
+同
+同一
+同时
+同時
+同样
+同樣
+后
+后来
+后者
+后面
+向
+向使
+向着
+向著
+吓
+吗
+否则
+否則
+吧
+吧哒
+吧噠
+吱
+吶
+呀
+呃
+呆呆地
+呐
+呕
+呗
+呜
+呜呼
+呢
+周围
+周圍
+呵
+呵呵
+呸
+呼哧
+呼啦
+咁
+咋
+和
+咚
+咦
+咧
+咱
+咱们
+咱們
+咳
+哇
+哈
+哈哈
+哉
+哎
+哎呀
+哎哟
+哎喲
+哗
+哗啦
+哟
+哦
+哩
+哪
+哪个
+哪些
+哪個
+哪儿
+哪兒
+哪天
+哪年
+哪怕
+哪样
+哪樣
+哪裏
+哪裡
+哪边
+哪邊
+哪里
+哼
+哼唷
+唄
+唉
+唔
+唯有
+啊
+啊呀
+啊哈
+啊哟
+啊喲
+問題
+啐
+啥
+啦
+啪达
+啪達
+啷噹
+啷当
+喀
+喂
+喏
+喔唷
+單
+單單
+單純
+喲
+喽
+嗎
+嗚
+嗚呼
+嗡
+嗡嗡
+嗬
+嗯
+嗳
+嘅
+嘍
+嘎
+嘎嘎
+嘎登
+嘔
+嘘
+嘛
+嘩
+嘩啦
+嘻
+嘿
+嘿嘿
+噓
+噯
+嚇
+嚴格
+嚴重
+四
+因
+因为
+因了
+因此
+因為
+因爲
+因着
+因而
+因著
+固
+固然
+在
+在下
+在于
+在於
+地
+均
+坚决
+坚持
+基于
+基於
+基本
+基本上
+堅持
+堅決
+報導
+報道
+处在
+处处
+处理
+复杂
+多
+多么
+多亏
+多多
+多多少少
+多多益善
+多少
+多年來
+多年前
+多年来
+多数
+多數
+多次
+多虧
+多麼
+够瞧的
+夠瞧的
+夥同
+大
+大不了
+大举
+大事
+大体
+大体上
+大凡
+大力
+大多
+大多数
+大多數
+大大
+大家
+大张旗鼓
+大張旗鼓
+大批
+大抵
+大概
+大略
+大約
+大约
+大致
+大舉
+大都
+大量
+大面儿上
+大面兒上
+大體
+大體上
+失去
+奇
+奈
+奋勇
+奮勇
+她
+她们
+她們
+她是
+她的
+好
+好像
+好在
+好的
+好象
+如
+如上
+如上所述
+如下
+如今
+如何
+如其
+如前所述
+如同
+如常
+如是
+如期
+如果
+如次
+如此
+如此等等
+如若
+始而
+姑且
+存在
+存心
+孰料
+孰知
+宁
+宁可
+宁愿
+宁肯
+它
+它们
+它们的
+它們
+它們的
+它是
+它的
+安全
+完全
+完成
+定
+实现
+实际
+宣布
+容易
+密切
+實現
+實際
+寧
+寧可
+寧肯
+寧願
+对
+对于
+对应
+对待
+对方
+对比
+将
+将才
+将要
+将近
+將
+將才
+將要
+將近
+專門
+對
+對待
+對應
+對方
+對於
+對比
+小
+少数
+少數
+尔
+尔后
+尔尔
+尔等
+尚且
+尤其
+就
+就地
+就是
+就是了
+就是說
+就是説
+就是说
+就此
+就算
+就要
+尽
+尽可能
+尽如人意
+尽心尽力
+尽心竭力
+尽快
+尽早
+尽然
+尽管
+尽管如此
+尽量
+局外
+居然
+屆時
+届时
+属于
+屡
+屡屡
+屡次
+屡次三番
+屢
+屢屢
+屢次
+屢次三番
+屬於
+岂
+岂但
+岂止
+岂非
+川流不息
+左右
+巨大
+巩固
+差一点
+差一點
+差不多
+己
+已
+已矣
+已經
+已经
+巴
+巴巴
+带
+帮助
+帶
+常
+常常
+常言說
+常言說得好
+常言说
+常言说得好
+常言道
+幫助
+平素
+年
+年复一年
+年復一年
+并
+并不
+并不是
+并且
+并排
+并无
+并没
+并没有
+并肩
+并非
+幾
+幾乎
+幾度
+幾時
+幾番
+幾經
+广大
+广泛
+应当
+应用
+应该
+庶乎
+庶几
+庶幾
+廣大
+廣泛
+开外
+开始
+开展
+引起
+弗
+強烈
+強調
+弹指之间
+强烈
+强调
+彈指之間
+归
+归根到底
+归根结底
+归齐
+当
+当下
+当中
+当儿
+当前
+当即
+当口儿
+当地
+当场
+当头
+当庭
+当时
+当然
+当真
+当着
+形成
+彻夜
+彻底
+彼
+彼时
+彼時
+彼此
+往
+往往
+待
+待到
+很
+很多
+很少
+後
+後來
+後来
+後者
+後面
+得
+得了
+得出
+得到
+得天独厚
+得天獨厚
+得起
+從
+從不
+從中
+從事
+從今以後
+從來
+從優
+從古到今
+從古至今
+從嚴
+從寬
+從小
+從新
+從早到晚
+從未
+從此
+從此以後
+從無到有
+從而
+從輕
+從速
+從重
+從頭
+徹夜
+徹底
+心裡
+心里
+必
+必定
+必将
+必將
+必然
+必要
+必須
+必须
+快
+快要
+忽地
+忽然
+怎
+怎么
+怎么办
+怎么样
+怎奈
+怎样
+怎樣
+怎麼
+怎麼樣
+怎麼辦
+怎麽
+怕
+急匆匆
+怪
+怪不得
+总之
+总是
+总的来看
+总的来说
+总的说来
+总结
+总而言之
+恍然
+恐怕
+恰似
+恰好
+恰如
+恰巧
+恰恰
+恰恰相反
+恰逢
+您
+您们
+您們
+您是
+惟其
+惯常
+意思
+愤然
+愿意
+慢說
+慢説
+慢说
+慣常
+憑
+憑藉
+憤然
+應用
+應當
+應該
+成为
+成年
+成年累月
+成心
+成為
+我
+我们
+我們
+我是
+我的
+或
+或则
+或則
+或多或少
+或是
+或曰
+或者
+或許
+或许
+战斗
+截然
+截至
+戰鬥
+所
+所以
+所在
+所幸
+所有
+所謂
+所谓
+才
+才能
+扑通
+打
+打从
+打开天窗说亮话
+打從
+打開天窗說亮話
+扩大
+把
+抑或
+报导
+报道
+抽冷子
+拦腰
+拿
+指
+指出
+按
+按时
+按時
+按期
+按照
+按理
+按說
+按说
+挨个
+挨個
+挨家挨戶
+挨家挨户
+挨次
+挨着
+挨著
+挨門挨戶
+挨門逐戶
+挨门挨户
+挨门逐户
+换句话说
+换言之
+据
+据实
+据悉
+据我所知
+据此
+据称
+据说
+掌握
+採取
+接下來
+接下来
+接着
+接著
+接连不断
+接連不斷
+換句話說
+換句話説
+換言之
+撲通
+據
+據實
+據悉
+據我所知
+據此
+據稱
+據說
+擴大
+攔腰
+放量
+故
+故意
+故此
+故而
+敞开儿
+敞開兒
+敢
+敢于
+敢情
+敢於
+数/
+整个
+整個
+數/
+断然
+斷然
+方
+方便
+方才
+方能
+方面
+於
+於是
+於是乎
+旁人
+无
+无宁
+无法
+无论
+既
+既...又
+既往
+既是
+既然
+日
+日复一日
+日復一日
+日渐
+日漸
+日益
+日臻
+日見
+日见
+时
+时候
+昂然
+明显
+明确
+明確
+明顯
+是
+是不是
+是以
+是否
+是的
+显然
+显著
+時
+時候
+普通
+普遍
+暗中
+暗地裡
+暗地里
+暗自
+更
+更为
+更加
+更為
+更进一步
+更進一步
+曾
+曾經
+曾经
+替
+替代
+最
+最后
+最大
+最好
+最後
+最近
+最高
+會
+月
+有
+有些
+有关
+有利
+有力
+有及
+有所
+有效
+有时
+有時
+有点
+有的
+有的是
+有着
+有著
+有關
+有點
+望
+朝
+朝着
+朝著
+末##末
+本
+本人
+本地
+本着
+本著
+本身
+权时
+来
+来不及
+来得及
+来看
+来着
+来自
+来讲
+来说
+极
+极为
+极了
+极其
+极力
+极大
+极度
+极端
+构成
+果然
+果真
+某
+某个
+某些
+某個
+某某
+根据
+根據
+根本
+格外
+梆
+極
+極了
+極其
+極力
+極大
+極度
+極為
+極端
+概
+構成
+權時
+次第
+欢迎
+欤
+歟
+歡迎
+正值
+正在
+正如
+正巧
+正常
+正是
+此
+此中
+此后
+此地
+此处
+此外
+此後
+此时
+此時
+此次
+此處
+此間
+此间
+歷
+歸
+歸根到底
+歸根結底
+歸齊
+殆
+毋宁
+毋寧
+每
+每个
+每個
+每天
+每年
+每当
+每时每刻
+每時每刻
+每每
+每當
+每逢
+比
+比及
+比如
+比如說
+比如说
+比方
+比照
+比起
+比較
+比较
+毕竟
+毫不
+毫无
+毫无例外
+毫无保留地
+毫無
+毫無例外
+毫無保留地
+汝
+決不
+決定
+決非
+沒
+沒奈何
+沒有
+沙沙
+没
+没奈何
+没有
+沿
+沿着
+沿著
+況且
+注意
+活
+深入
+清楚
+湊巧
+準備
+满
+满足
+滿
+滿足
+漫說
+漫説
+漫说
+為
+為主
+為了
+為什麼
+為什麽
+為何
+為止
+為此
+為著
+烏乎
+焉
+無
+無寧
+無法
+無論
+然
+然则
+然則
+然后
+然後
+然而
+照
+照着
+照著
+爭取
+爲了
+爲什麼
+爲何
+爲甚麼
+爲着
+爲著
+爾
+爾後
+爾爾
+爾等
+牢牢
+特別是
+特别是
+特殊
+特点
+特約
+特约
+特點
+犹且
+犹自
+独
+独媒特约
+独自
+猛然
+猛然間
+猛然间
+猶且
+猶自
+獨
+獨媒特約
+獨自
+獲得
+率尔
+率然
+率爾
+现代
+现在
+現代
+現在
+理应
+理当
+理應
+理當
+理該
+理该
+瑟瑟
+甚且
+甚么
+甚或
+甚而
+甚至
+甚至于
+甚至於
+甚麼
+甚麼樣
+甚麽
+產生
+用
+用來
+用来
+甫
+甭
+由
+由于
+由於
+由是
+由此
+由此可見
+由此可见
+畢竟
+略
+略为
+略加
+略微
+略為
+當
+當下
+當中
+當兒
+當前
+當即
+當口兒
+當地
+當場
+當庭
+當時
+當然
+當真
+當着
+當著
+當頭
+白
+白白
+的
+的确
+的確
+的話
+的话
+皆可
+盡
+盡可能
+盡如人意
+盡心盡力
+盡心竭力
+盡快
+盡然
+盡量
+目前
+直到
+直接
+相似
+相信
+相反
+相同
+相对
+相对而言
+相對
+相對而言
+相应
+相当
+相應
+相當
+相等
+省得
+看
+看上去
+看來
+看出
+看到
+看来
+看样子
+看樣子
+看看
+看見
+看见
+看起來
+看起来
+真是
+真正
+眨眼
+着
+着呢
+矣
+矣乎
+矣哉
+知道
+砰
+确定
+碰巧
+確定
+社会主义
+社會主義
+离
+种
+积极
+称
+移动
+移動
+種
+稱
+積極
+究竟
+穷年累月
+突出
+突然
+窃
+窮年累月
+竊
+立
+立刻
+立即
+立地
+立时
+立時
+立馬
+立马
+竟
+竟然
+竟而
+第
+第二
+等
+等到
+等等
+策略地
+简直
+简而言之
+简言之
+管
+範圍
+簡直
+簡而言之
+簡言之
+类如
+粗
+精光
+純
+純粹
+紧接着
+累年
+累次
+組成
+結合
+結果
+絕
+絕不
+絕對
+絕非
+絕頂
+給
+經
+經常
+經過
+綜上所述
+維持
+緊接著
+練習
+縱
+縱令
+縱使
+縱然
+縷縷
+總之
+總括來説
+總括而言
+總是
+總的來看
+總的來說
+總的來説
+總的說來
+總的説來
+總結
+總而言之
+繼之
+繼後
+繼續
+繼而
+纯
+纯粹
+纵
+纵令
+纵使
+纵然
+练习
+组成
+经
+经常
+经过
+结合
+结果
+给
+绝
+绝不
+绝对
+绝非
+绝顶
+继之
+继后
+继续
+继而
+维持
+综上所述
+缕缕
+罢了
+罷了
+老
+老大
+老是
+老老实实
+老老實實
+考慮
+考虑
+者
+而
+而且
+而况
+而又
+而后
+而外
+而已
+而後
+而是
+而況
+而言
+而論
+而论
+联系
+联袂
+聯繫
+聯袂
+背地裡
+背地里
+背靠背
+能
+能否
+能够
+能夠
+腾
+臨
+臨到
+自
+自个儿
+自从
+自個兒
+自各儿
+自各兒
+自后
+自家
+自己
+自後
+自從
+自打
+自身
+臭
+至
+至于
+至今
+至於
+至若
+致
+與
+與其
+與其說
+與否
+與此同時
+舉凡
+舉行
+般的
+良好
+若
+若夫
+若是
+若果
+若非
+范围
+莫
+莫不
+莫不然
+莫如
+莫若
+莫非
+获得
+萬一
+著
+著呢
+藉以
+藉此
+處在
+處理
+處處
+虽
+虽则
+虽然
+虽说
+蛮
+蠻
+行为
+行动
+行動
+行為
+衝
+表明
+表示
+被
+裡面
+複雜
+要
+要不
+要不是
+要不然
+要么
+要是
+要求
+要麼
+見
+規定
+親口
+親手
+親眼
+親自
+親身
+覺得
+见
+规定
+觉得
+設使
+設或
+設若
+許多
+話說
+該
+該當
+認為
+認爲
+認真
+認識
+誠如
+誠然
+說
+說來
+說明
+說說
+誰
+誰人
+誰料
+誰知
+請勿
+論
+論說
+諸
+諸位
+諸如
+謹
+譬喻
+譬如
+變成
+讓
+认为
+认真
+认识
+让
+许多
+论
+论说
+设使
+设或
+设若
+诚如
+诚然
+话说
+该
+该当
+说
+说明
+说来
+说说
+请勿
+诸
+诸位
+诸如
+谁
+谁人
+谁料
+谁知
+谨
+豁然
+豈
+豈但
+豈止
+豈非
+豐富
+賊死
+賴以
+贼死
+赖以
+赶
+赶快
+赶早不赶晚
+起
+起來
+起先
+起初
+起头
+起来
+起見
+起见
+起頭
+起首
+趁
+趁便
+趁势
+趁勢
+趁早
+趁机
+趁機
+趁热
+趁熱
+趁着
+趁著
+越是
+趕
+趕快
+趕早不趕晚
+距
+跟
+路經
+路经
+較
+較之
+較比
+較為
+轉動
+轉變
+轉貼
+轟然
+转动
+转变
+转贴
+轰然
+较
+较为
+较之
+较比
+边
+达到
+达旦
+迄
+迅速
+过
+过于
+过去
+过来
+运用
+近
+近來
+近几年来
+近年來
+近年来
+近幾年來
+近来
+还
+还是
+还有
+还要
+这
+这一来
+这个
+这么
+这么些
+这么样
+这么点儿
+这些
+这会儿
+这儿
+这就是说
+这时
+这样
+这次
+这点
+这种
+这般
+这边
+这里
+这麽
+进入
+进去
+进来
+进步
+进而
+进行
+连
+连同
+连声
+连日
+连日来
+连袂
+连连
+迟早
+迫于
+迫於
+适应
+适当
+适用
+逐步
+逐渐
+逐漸
+這
+這一來
+這些
+這個
+這兒
+這就是說
+這就是説
+這時
+這會兒
+這樣
+這次
+這種
+這般
+這裏
+這裡
+這邊
+這麼
+這麼些
+這麼樣
+這麼點兒
+這麽
+這點
+通常
+通过
+通過
+造成
+逢
+連
+連同
+連日
+連日來
+連聲
+連袂
+連連
+進來
+進入
+進去
+進步
+進而
+進行
+遇到
+運用
+過
+過來
+過去
+過於
+達到
+達旦
+適應
+適用
+適當
+遭到
+遲早
+遵循
+遵照
+避免
+還
+還是
+還有
+還要
+邊
+那
+那个
+那么
+那么些
+那么样
+那些
+那会儿
+那個
+那儿
+那兒
+那时
+那時
+那會兒
+那末
+那样
+那樣
+那般
+那裏
+那裡
+那边
+那邊
+那里
+那麼
+那麼些
+那麼樣
+那麽
+部分
+都
+鄙人
+采取
+里面
+重大
+重新
+重要
+針對
+鉴于
+鑑於
+鑒於
+针对
+長期以來
+長此下去
+長線
+長話短說
+长期以来
+长此下去
+长线
+长话短说
+開外
+開始
+開展
+間或
+關於
+问题
+间或
+防止
+阿
+附近
+陈年
+限制
+陡然
+除
+除了
+除却
+除卻
+除去
+除外
+除开
+除此
+除此之外
+除此以外
+除此而外
+除開
+除非
+陳年
+随
+随后
+随时
+随着
+随著
+隔夜
+隔日
+隨
+隨後
+隨時
+隨著
+难得
+难怪
+难说
+难道
+难道说
+集中
+雖
+雖則
+雖然
+雖說
+雖説
+雙方
+離
+難得
+難怪
+難說
+難道
+難道說
+雲爾
+零
+需要
+非但
+非常
+非徒
+非得
+非特
+非独
+非獨
+靠
+鞏固
+頂多
+頃
+頃刻
+頃刻之間
+頃刻間
+順
+順着
+順著
+頓時
+頗
+願意
+類如
+顯然
+顯著
+顶多
+顷
+顷刻
+顷刻之间
+顷刻间
+顺
+顺着
+顿时
+颇
+風雨無阻
+风雨无阻
+飽
+餘外
+餵
+饱
+首先
+馬上
+騰
+马上
+高低
+高兴
+高興
+麼
+默然
+默默地
+齊
+齐
+︿
+！
+＃
+＄
+％
+＆
+＇
+（
+）
+）÷（１－
+）、
+＊
+＋
+＋ξ
+＋＋
+，
+，也
+－
+－β
+－－
+－［＊］－
+．
+／
+０
+０：２
+１
+１．
+１２％
+２
+２．３％
+３
+４
+５
+５：０
+６
+７
+８
+９
+：
+；
+＜
+＜±
+＜Δ
+＜λ
+＜φ
+＜＜
+＝
+＝″
+＝☆
+＝（
+＝－
+＝［
+＝｛
+＞
+＞λ
+？
+＠
+［
+［①①］
+［①②］
+［①③］
+［①④］
+［①⑤］
+［①⑥］
+［①⑦］
+［①⑧］
+［①⑨］
+［①］
+［①Ａ］
+［①Ｂ］
+［①Ｃ］
+［①Ｄ］
+［①Ｅ］
+［①ｆ］
+［①ｇ］
+［①ｈ］
+［①ｉ］
+［①ｏ］
+［②
+［②①］
+［②②］
+［②③］
+［②④
+［②⑤］
+［②⑥］
+［②⑦］
+［②⑧］
+［②⑩］
+［②］
+［②ａ］
+［②Ｂ］
+［②ｃ］
+［②ｄ］
+［②ｅ］
+［②ｆ］
+［②Ｇ］
+［②ｈ］
+［②ｉ］
+［②ｊ］
+［③①］
+［③⑩］
+［③］
+［③ａ］
+［③ｂ］
+［③ｃ］
+［③ｄ］
+［③ｅ］
+［③Ｆ］
+［③ｇ］
+［③ｈ］
+［④］
+［④ａ］
+［④ｂ］
+［④ｃ］
+［④ｄ］
+［④ｅ］
+［⑤］
+［⑤］］
+［⑤ａ］
+［⑤ｂ］
+［⑤ｄ］
+［⑤ｅ］
+［⑤ｆ］
+［⑥］
+［⑦］
+［⑧］
+［⑨］
+［⑩］
+［＊］
+［－
+［］
+］
+］∧′＝［
+］［
+＿
+Ａ
+ａ］
+ｂ］
+ｃ］
+ｅ］
+ｆ］
+ＬＩ
+ｎｇ昉
+Ｒ． Ｌ．
+Ｒ．Ｌ．
+ＺＸＦＩＴＬ
+｛
+｛－
+｜
+｝
+｝＞
+～
+～±
+～＋
+￥
\ No newline at end of file
diff --git a/apps/common/src/python/mediawords/util/config/common.py b/apps/common/src/python/mediawords/util/config/common.py
index 114514a52c..c0f117393c 100644
--- a/apps/common/src/python/mediawords/util/config/common.py
+++ b/apps/common/src/python/mediawords/util/config/common.py
@@ -1,9 +1,10 @@
 import collections
 import re
-from typing import List, Pattern, Optional
+from typing import List, Pattern, Optional, Union
 
 from mediawords.util.config import env_value, McConfigException
 from mediawords.util.parse_json import decode_json, McDecodeJSONException
+from mediawords.util.perl import decode_object_from_bytes_if_needed
 from mediawords.util.log import create_logger
 
 log = create_logger(__name__)
@@ -12,54 +13,110 @@
 class ConnectRetriesConfig(object):
     """Connect retries configuration."""
 
-    @staticmethod
-    def sleep_between_attempts() -> float:
+    __slots__ = [
+        '__sleep_between_attempts',
+        '__max_attempts',
+        '__fatal_error_on_failure',
+    ]
+
+    def __init__(self,
+                 sleep_between_attempts: float = 1.0,
+                 max_attempts: int = 60,
+                 fatal_error_on_failure: bool = True):
+
+        if isinstance(sleep_between_attempts, bytes):
+            sleep_between_attempts = decode_object_from_bytes_if_needed(sleep_between_attempts)
+        if isinstance(max_attempts, bytes):
+            max_attempts = decode_object_from_bytes_if_needed(max_attempts)
+        if isinstance(fatal_error_on_failure, bytes):
+            fatal_error_on_failure = decode_object_from_bytes_if_needed(fatal_error_on_failure)
+
+        self.__sleep_between_attempts = float(sleep_between_attempts)
+        self.__max_attempts = int(max_attempts)
+        self.__fatal_error_on_failure = bool(fatal_error_on_failure)
+
+    def sleep_between_attempts(self) -> float:
         """Seconds (or parts of second) to sleep between retries."""
-        return 1.0
+        return self.__sleep_between_attempts
 
-    @staticmethod
-    def max_attempts() -> int:
+    def max_attempts(self) -> int:
         """Max. number of attempts to connect.
 
         Must be positive (we want to try connecting at least one time).
         """
-        return 60
+        return self.__max_attempts
+
+    def fatal_error_on_failure(self) -> bool:
+        """
+        Return True if connect_to_db() should call fatal_error() and thus stop the whole process when giving up.
+
+        True is a useful value in production when you might want the process that's unable to connect to the database to
+        just die. However, you might choose to return False here too if the caller is prepared to handle connection
+        failures more gracefully (e.g. Temporal's retries).
+        """
+        return self.__fatal_error_on_failure
 
 
 class DatabaseConfig(object):
     """PostgreSQL database configuration."""
 
-    @staticmethod
-    def hostname() -> str:
+    __slots__ = [
+        '__hostname',
+        '__port',
+        '__database_name',
+        '__username',
+        '__password',
+        '__retries',
+    ]
+
+    def __init__(self,
+                 hostname: str = 'postgresql-pgbouncer',
+                 port: int = 6432,
+                 database_name: str = 'mediacloud',
+                 username: str = 'mediacloud',
+                 password: str = 'mediacloud',
+                 retries: Optional[ConnectRetriesConfig] = None):
+        if not retries:
+            retries = ConnectRetriesConfig()
+
+        if isinstance(port, bytes):
+            port = decode_object_from_bytes_if_needed(port)
+
+        hostname = decode_object_from_bytes_if_needed(hostname)
+        database_name = decode_object_from_bytes_if_needed(database_name)
+        username = decode_object_from_bytes_if_needed(username)
+        password = decode_object_from_bytes_if_needed(password)
+
+        self.__hostname = hostname
+        self.__port = int(port)
+        self.__database_name = database_name
+        self.__username = username
+        self.__password = password
+        self.__retries = retries
+
+    def hostname(self) -> str:
         """Hostname."""
-        # Container's name from docker-compose.yml
-        return "postgresql-pgbouncer"
+        return self.__hostname
 
-    @staticmethod
-    def port() -> int:
+    def port(self) -> int:
         """Port."""
-        # Container's exposed port from docker-compose.yml
-        return 6432
+        return self.__port
 
-    @staticmethod
-    def database_name() -> str:
+    def database_name(self) -> str:
         """Database name."""
-        return "mediacloud"
+        return self.__database_name
 
-    @staticmethod
-    def username() -> str:
+    def username(self) -> str:
         """Username."""
-        return "mediacloud"
+        return self.__username
 
-    @staticmethod
-    def password() -> str:
+    def password(self) -> str:
         """Password."""
-        return "mediacloud"
+        return self.__password
 
-    @staticmethod
-    def retries() -> ConnectRetriesConfig:
+    def retries(self) -> ConnectRetriesConfig:
         """connect_to_db() retries configuration."""
-        return ConnectRetriesConfig()
+        return self.__retries
 
 
 class AmazonS3DownloadsConfig(object):
@@ -86,41 +143,117 @@ def directory_name() -> str:
         return env_value('MC_DOWNLOADS_AMAZON_S3_DIRECTORY_NAME', allow_empty_string=True)
 
 
+class RabbitMQRetriesConfig(object):
+    """
+    RabbitMQ retries configuration.
+
+    https://docs.celeryproject.org/en/v4.4.7/userguide/calling.html#calling-retry
+    """
+
+    __slots__ = [
+        '__max_retries',
+        '__interval_start',
+        '__interval_step',
+        '__interval_max',
+    ]
+
+    def __init__(self,
+                 max_retries: Optional[int] = 3,
+                 interval_start: Union[int, float] = 0,
+                 interval_step: Union[int, float] = 0.2,
+                 interval_max: Union[int, float] = 0.2):
+        if isinstance(max_retries, bytes):
+            max_retries = decode_object_from_bytes_if_needed(max_retries)
+        if isinstance(interval_start, bytes):
+            interval_start = decode_object_from_bytes_if_needed(interval_start)
+        if isinstance(interval_step, bytes):
+            interval_step = decode_object_from_bytes_if_needed(interval_step)
+        if isinstance(interval_max, bytes):
+            interval_max = decode_object_from_bytes_if_needed(interval_max)
+
+        self.__max_retries = None if max_retries is None else int(max_retries)  # We want to preserve None here
+        self.__interval_start = float(interval_start)
+        self.__interval_step = float(interval_step)
+        self.__interval_max = float(interval_max)
+
+    def max_retries(self) -> Optional[int]:
+        return self.__max_retries
+
+    def interval_start(self) -> float:
+        return self.__interval_start
+
+    def interval_step(self) -> float:
+        return self.__interval_step
+
+    def interval_max(self) -> float:
+        return self.__interval_max
+
+
 class RabbitMQConfig(object):
     """RabbitMQ (Celery broker) client configuration."""
 
-    @staticmethod
-    def hostname() -> str:
+    __slots__ = [
+        '__hostname',
+        '__port',
+        '__username',
+        '__password',
+        '__vhost',
+        '__timeout',
+        '__retries',
+    ]
+
+    def __init__(self,
+                 hostname: str = 'rabbitmq-server',
+                 port: int = 5672,
+                 username: str = 'mediacloud',
+                 password: str = 'mediacloud',
+                 vhost: str = '/mediacloud',
+                 timeout: int = 60,
+                 retries: Optional[RabbitMQRetriesConfig] = None):
+        hostname = decode_object_from_bytes_if_needed(hostname)
+        if isinstance(port, bytes):
+            port = decode_object_from_bytes_if_needed(port)
+        username = decode_object_from_bytes_if_needed(username)
+        password = decode_object_from_bytes_if_needed(password)
+        vhost = decode_object_from_bytes_if_needed(vhost)
+        if isinstance(timeout, bytes):
+            timeout = decode_object_from_bytes_if_needed(timeout)
+
+        self.__hostname = hostname
+        self.__port = int(port)
+        self.__username = username
+        self.__password = password
+        self.__vhost = vhost
+        self.__timeout = int(timeout)
+        self.__retries = retries
+
+    def hostname(self) -> str:
         """Hostname."""
-        # Container's name from docker-compose.yml
-        return "rabbitmq-server"
+        return self.__hostname
 
-    @staticmethod
-    def port() -> int:
+    def port(self) -> int:
         """Port."""
-        # Container's exposed port from docker-compose.yml
-        return 5672
+        return self.__port
 
-    @staticmethod
-    def username() -> str:
+    def username(self) -> str:
         """Username."""
-        return "mediacloud"
+        return self.__username
 
-    @staticmethod
-    def password() -> str:
+    def password(self) -> str:
         """Password."""
-        return "mediacloud"
+        return self.__password
 
-    @staticmethod
-    def vhost() -> str:
+    def vhost(self) -> str:
         """Virtual host."""
-        return "/mediacloud"
+        return self.__vhost
 
-    @staticmethod
-    def timeout() -> int:
+    def timeout(self) -> int:
         """Timeout."""
-        # FIXME possibly hardcode it somewhere
-        return 60
+        return self.__timeout
+
+    def retries(self) -> Optional[RabbitMQRetriesConfig]:
+        """Retry policy; if None, retries are disabled."""
+        return self.__retries
 
 
 class SMTPConfig(object):
@@ -155,6 +288,14 @@ def password() -> str:
         """Password."""
         return ''
 
+    @staticmethod
+    def unsubscribe_address() -> str:
+        """Email to which unsubscribe/account deletion requests should be sent"""
+        address = env_value('MC_EMAIL_UNSUBSCRIBE', required=False, allow_empty_string=True)
+        if address is None or '@' not in address:
+            address = 'support@example.com'
+        return address
+
 
 class DownloadStorageConfig(object):
     """Download storage configuration."""
diff --git a/apps/common/src/python/mediawords/util/mail.py b/apps/common/src/python/mediawords/util/mail.py
index 3e75702f6d..9bea5ec1ed 100644
--- a/apps/common/src/python/mediawords/util/mail.py
+++ b/apps/common/src/python/mediawords/util/mail.py
@@ -117,6 +117,12 @@ def send_email(message: Message) -> bool:
             message_part = MIMEText(message.text_body, 'plain', 'utf-8')
             mime_message.attach(message_part)
 
+        unsubscribe_address = CommonConfig.smtp().unsubscribe_address()
+        
+        mime_message.add_header(
+            'List-Unsubscribe', 
+             f'mailto:{unsubscribe_address}?subject=Delete%20account%20and%20unsubscribe')
+
         # HTML gets attached last, thus making it a preferred part as per RFC
         if message.html_body:
             message_part = MIMEText(message.html_body, 'html', 'utf-8')
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/__init__.py b/apps/common/src/python/mediawords/workflow/__init__.py
similarity index 100%
rename from apps/podcast-fetch-episode/src/python/podcast_fetch_episode/__init__.py
rename to apps/common/src/python/mediawords/workflow/__init__.py
diff --git a/apps/common/src/python/mediawords/workflow/client.py b/apps/common/src/python/mediawords/workflow/client.py
new file mode 100644
index 0000000000..560ead779e
--- /dev/null
+++ b/apps/common/src/python/mediawords/workflow/client.py
@@ -0,0 +1,22 @@
+from temporal.workflow import WorkflowClient
+
+from mediawords.util.network import wait_for_tcp_port_to_open
+
+
+def workflow_client(namespace: str = 'default') -> WorkflowClient:
+    """
+    Connect to Temporal server and return its client.
+
+    :param namespace: Namespace to connect to.
+    :return: WorkflowClient instance.
+    """
+
+    host = 'temporal-server'
+    port = 7233
+
+    # It's super lame to wait for this port to open, but the Python SDK seems to fail otherwise
+    wait_for_tcp_port_to_open(hostname=host, port=port)
+
+    client = WorkflowClient.new_client(host=host, port=port, namespace=namespace)
+
+    return client
diff --git a/apps/common/src/python/mediawords/workflow/exceptions.py b/apps/common/src/python/mediawords/workflow/exceptions.py
new file mode 100644
index 0000000000..46fb109058
--- /dev/null
+++ b/apps/common/src/python/mediawords/workflow/exceptions.py
@@ -0,0 +1,80 @@
+"""
+Custom exceptions used for reporting back various errors back to the workflow.
+"""
+
+import abc
+
+
+class _AbstractMcWorkflowError(Exception, metaclass=abc.ABCMeta):
+    """Abstract exception."""
+    pass
+
+
+class McProgrammingError(_AbstractMcWorkflowError):
+    """
+    Exception thrown on programming errors.
+
+    It's pointless to retry actions that have caused this error as we need to fix some code first, and it might be a
+    good idea to stop whatever we're doing altogether.
+
+    Examples include:
+
+    * Various third party APIs returning something that our code can't understand.
+    * Files existing where they're not supposed to exist.
+    * Typos in SQL commands.
+    * Assertions.
+    """
+    pass
+
+
+class McConfigurationError(_AbstractMcWorkflowError):
+    """
+    Exception thrown when something is misconfigured.
+
+    Different from McProgrammingError in that we can figure out that there's a configuration problem somewhere almost
+    immediately upon start, while a programming error can take some time to show up (e.g. some sort of an external API
+    doesn't work with particular inputs, or the temporary directory can't be written to anymore because we wrote too
+    many files in it).
+
+    No reason to retry whatever has caused this error as someone needs to fix the configuration first, and one should
+    consider stopping whatever that we're doing as there's no point in continuing without valid configuration anyway.
+
+    Examples include:
+
+    * Configuration environment variables not set / set to invalid values.
+    * Bad authentication credentials.
+    * Invalid arguments passed.
+    """
+    pass
+
+
+class McTransientError(_AbstractMcWorkflowError):
+    """
+    Exception thrown on transient (occurring at irregular intervals) errors.
+
+    It is reasonable to expect that when this error occurs, we can wait for a bit, retry and the action might succeed.
+
+    Examples include:
+
+    * Not being able to connect to the database.
+    * HTTP server responding with "503 Service Unavailable".
+    * Network being down.
+    """
+    pass
+
+
+class McPermanentError(_AbstractMcWorkflowError):
+    """
+    Exception thrown when some expectations of the application were unmet so it can't proceed with a specific input but
+    it's likely that it will be able to process other inputs.
+
+    There's nothing wrong with the code that does the processing, and we can continue on processing other inputs, but
+    there's no way to continue processing this particular input or retrying on this error.
+
+    Examples include:
+
+    * One of the stories that's to be processed does not exist at all.
+    * HTTP server responding with "404 Not Found".
+    * Downloaded media file turns out to not be a media file at all.
+    """
+    pass
diff --git a/apps/common/src/python/mediawords/workflow/worker.py b/apps/common/src/python/mediawords/workflow/worker.py
new file mode 100644
index 0000000000..86178f6f19
--- /dev/null
+++ b/apps/common/src/python/mediawords/workflow/worker.py
@@ -0,0 +1,16 @@
+import asyncio
+
+from temporal.worker import Worker
+
+
+async def stop_worker_faster(worker: Worker) -> None:
+    """
+    Stops worker but does it slightly faster.
+
+    Default implementation of worker.stop() sleeps for 5 seconds between retries. We sleep a bit less.
+
+    :param worker: Worker instance to stop
+    """
+    worker.stop_requested = True
+    while worker.threads_stopped != worker.threads_started:
+        await asyncio.sleep(0.5)
diff --git a/apps/common/src/requirements.txt b/apps/common/src/requirements.txt
index 96c3385b19..3347d75565 100644
--- a/apps/common/src/requirements.txt
+++ b/apps/common/src/requirements.txt
@@ -44,14 +44,14 @@ furl==2.1.0
 jieba==0.42.1
 
 # Parsing email templates
-Jinja2==2.11.2
+Jinja2==2.11.3
 
 # One of Celery's dependencies (here just for PyCharm to stop complaining)
 # Upgrade together with Celery and not separately.
 kombu==4.6.11
 
 # XML manipulations, HTML parsing
-lxml==4.6.2
+lxml==4.6.3
 
 # Japanese language tokenizer, stemmer, etc.
 mecab-python3==1.0.3
@@ -71,6 +71,9 @@ PyStemmer==2.0.1
 # Unit tests
 pytest==6.2.2
 
+# asyncio tests
+pytest-asyncio==0.15.1
+
 # Timezone handling
 pytz==2020.5
 
@@ -89,8 +92,11 @@ sentence_splitter==1.4
 # Celery PostgreSQL result backend support
 sqlalchemy==1.3.22
 
+# Temporal's Python SDK
+git+https://github.com/firdaus/temporal-python-sdk.git@8604d025ae1272b592d3d4dd430acd15eeb6562a#egg=temporal-python-sdk
+
 # Normalizing URLs
 url_normalize==1.4.3
 
 # Low level HTTP requests (with SSL certificate verification)
-urllib3[secure]==1.26.3
+urllib3[secure]==1.26.5
diff --git a/apps/common/tests/python/mediawords/job/setup_broker_test.py b/apps/common/tests/python/mediawords/job/setup_broker_test.py
index ca85f90b56..c7503eb019 100644
--- a/apps/common/tests/python/mediawords/job/setup_broker_test.py
+++ b/apps/common/tests/python/mediawords/job/setup_broker_test.py
@@ -39,12 +39,12 @@ class AbstractBrokerTestCase(TestCase, metaclass=abc.ABCMeta):
     @classmethod
     @abc.abstractmethod
     def worker_paths(cls) -> List[Worker]:
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     @classmethod
     @abc.abstractmethod
     def broker_class(cls) -> Type[JobBroker]:
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     @classmethod
     def setUpClass(cls) -> None:
diff --git a/apps/common/tests/python/mediawords/languages/test_lt.py b/apps/common/tests/python/mediawords/languages/test_lt.py
index 87aa142519..5ecb4a916a 100644
--- a/apps/common/tests/python/mediawords/languages/test_lt.py
+++ b/apps/common/tests/python/mediawords/languages/test_lt.py
@@ -17,7 +17,7 @@ def test_sample_sentence(self):
 
     def test_stop_words_map(self):
         stop_words = self.__tokenizer.stop_words_map()
-        assert "buvo" in stop_words
+        assert "dargi" in stop_words
         assert "not_a_stopword" not in stop_words
 
     def test_stem(self):
diff --git a/apps/common/tests/python/mediawords/languages/test_pt.py b/apps/common/tests/python/mediawords/languages/test_pt.py
index b385cad73b..cec4e0c41a 100644
--- a/apps/common/tests/python/mediawords/languages/test_pt.py
+++ b/apps/common/tests/python/mediawords/languages/test_pt.py
@@ -17,7 +17,7 @@ def test_sample_sentence(self):
 
     def test_stop_words_map(self):
         stop_words = self.__tokenizer.stop_words_map()
-        assert "fãs" in stop_words
+        assert "abre" in stop_words
         assert "not_a_stopword" not in stop_words
 
     def test_stem(self):
diff --git a/apps/common/tests/python/mediawords/languages/test_sv.py b/apps/common/tests/python/mediawords/languages/test_sv.py
index 48c217e7e3..cd9f8a7fa4 100644
--- a/apps/common/tests/python/mediawords/languages/test_sv.py
+++ b/apps/common/tests/python/mediawords/languages/test_sv.py
@@ -17,7 +17,7 @@ def test_sample_sentence(self):
 
     def test_stop_words_map(self):
         stop_words = self.__tokenizer.stop_words_map()
-        assert "vår" in stop_words
+        assert "åttio" in stop_words
         assert "not_a_stopword" not in stop_words
 
     def test_stem(self):
diff --git a/apps/common/tests/python/mediawords/languages/test_zh.py b/apps/common/tests/python/mediawords/languages/test_zh.py
index 9e21730b36..ae0b55ee59 100644
--- a/apps/common/tests/python/mediawords/languages/test_zh.py
+++ b/apps/common/tests/python/mediawords/languages/test_zh.py
@@ -17,7 +17,7 @@ def test_sample_sentence(self):
 
     def test_stop_words_map(self):
         stop_words = self.__tokenizer.stop_words_map()
-        assert "不勝" in stop_words
+        assert "不起" in stop_words
         assert "not_a_stopword" not in stop_words
 
     def test_stem(self):
diff --git a/apps/crawler-ap/.idea/mediawords.sql b/apps/crawler-ap/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/crawler-ap/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/crawler-ap/.idea/sqlDataSources.xml b/apps/crawler-ap/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..51c3dd16dc
--- /dev/null
+++ b/apps/crawler-ap/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="8e4582c6-932a-45cc-9069-db620a3d7cdb" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/crawler-ap/.idea/sqldialects.xml b/apps/crawler-ap/.idea/sqldialects.xml
index 790b3f37f8..92fefa2e78 100644
--- a/apps/crawler-ap/.idea/sqldialects.xml
+++ b/apps/crawler-ap/.idea/sqldialects.xml
@@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="SqlDialectMappings">
+    <file url="file://$PROJECT_DIR$/.idea/mediawords.sql" dialect="PostgreSQL" />
     <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
     <file url="PROJECT" dialect="PostgreSQL" />
   </component>
diff --git a/apps/crawler-ap/docker-compose.tests.yml b/apps/crawler-ap/docker-compose.tests.yml
index 0c7ecbf4f3..2ea0f17570 100644
--- a/apps/crawler-ap/docker-compose.tests.yml
+++ b/apps/crawler-ap/docker-compose.tests.yml
@@ -93,8 +93,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/crawler-fetcher/.idea/crawler-fetcher.iml b/apps/crawler-fetcher/.idea/crawler-fetcher.iml
index f29e2751d6..ff9e527d05 100644
--- a/apps/crawler-fetcher/.idea/crawler-fetcher.iml
+++ b/apps/crawler-fetcher/.idea/crawler-fetcher.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (crawler-fetcher at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/crawler-fetcher/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (crawler-fetcher at [/home/pypt/m/apps/crawler-fetcher/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/crawler-fetcher/.idea/mediawords.sql b/apps/crawler-fetcher/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/crawler-fetcher/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/crawler-fetcher/.idea/misc.xml b/apps/crawler-fetcher/.idea/misc.xml
index 4da3ef8ce4..0b6f459d16 100644
--- a/apps/crawler-fetcher/.idea/misc.xml
+++ b/apps/crawler-fetcher/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (crawler-fetcher at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/crawler-fetcher/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (crawler-fetcher at [/home/pypt/m/apps/crawler-fetcher/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/crawler-fetcher/.idea/sqlDataSources.xml b/apps/crawler-fetcher/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..f3e178e0d8
--- /dev/null
+++ b/apps/crawler-fetcher/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="e40bfd77-fe92-41ac-bb01-e4520d76ab13" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/crawler-fetcher/docker-compose.tests.yml b/apps/crawler-fetcher/docker-compose.tests.yml
index bce5615b12..fb3cb718a7 100644
--- a/apps/crawler-fetcher/docker-compose.tests.yml
+++ b/apps/crawler-fetcher/docker-compose.tests.yml
@@ -56,8 +56,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/crawler-fetcher/src/python/crawler_fetcher/handler.py b/apps/crawler-fetcher/src/python/crawler_fetcher/handler.py
index 095e1fab3a..833d668d63 100644
--- a/apps/crawler-fetcher/src/python/crawler_fetcher/handler.py
+++ b/apps/crawler-fetcher/src/python/crawler_fetcher/handler.py
@@ -26,9 +26,9 @@ def fetch_download(self, db: DatabaseHandler, download: dict) -> Optional[Respon
         Return Response if the download had to be fetched and was, in fact, fetched; or return None if the download
         shouldn't / couldn't be fetched for whatever reason but no error is to be reported.
         """
-        raise NotImplemented("Abstract method.")
+        raise NotImplementedError("Abstract method.")
 
     def store_response(self, db: DatabaseHandler, download: dict, response: Response) -> None:
         """Store the download (response object) somehow, e.g. store it, parse if it is a feed, add new stories derived
         from it, etc."""
-        raise NotImplemented("Abstract method.")
+        raise NotImplementedError("Abstract method.")
diff --git a/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/default/store_mixin.py b/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/default/store_mixin.py
index 98042b2dc2..c779c1a49e 100644
--- a/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/default/store_mixin.py
+++ b/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/default/store_mixin.py
@@ -38,7 +38,7 @@ def store_download(self, db: DatabaseHandler, download: dict, content: str) -> L
           feed;
         * 'feed/web_page' downloads return a list with a single 'web_page' story to be extracted.
         """
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     def _store_failed_download_error_message(self, db: DatabaseHandler, download: dict, response: Response) -> None:
         """
diff --git a/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/feed.py b/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/feed.py
index d402c76a72..25f46d08a0 100644
--- a/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/feed.py
+++ b/apps/crawler-fetcher/src/python/crawler_fetcher/handlers/feed.py
@@ -33,7 +33,7 @@ def add_stories_from_feed(self, db: DatabaseHandler, download: dict, content: st
 
         If helper returns an empty arrayref, '(redundant feed)' will be written instead of feed contents.
         """
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     @abc.abstractmethod
     def return_stories_to_be_extracted_from_feed(self, db: DatabaseHandler, download: dict, content: str) -> List[int]:
@@ -42,7 +42,7 @@ def return_stories_to_be_extracted_from_feed(self, db: DatabaseHandler, download
 
         For example, 'web_page' feed creates a single story for itself so it has to be extracted right away.
         """
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     def store_download(self, db: DatabaseHandler, download: dict, content: str) -> List[int]:
         download = decode_object_from_bytes_if_needed(download)
diff --git a/apps/crawler-fetcher/tests/python/setup_handler_test.py b/apps/crawler-fetcher/tests/python/setup_handler_test.py
index 27f0cd0b41..8c6f77b953 100644
--- a/apps/crawler-fetcher/tests/python/setup_handler_test.py
+++ b/apps/crawler-fetcher/tests/python/setup_handler_test.py
@@ -23,7 +23,7 @@ class TestDownloadHandler(TestCase, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def hashserver_pages(self) -> Dict[str, Any]:
         """Return HashServer pages to serve."""
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     def _fetch_and_handle_response(self, path: str, downloads_id: Optional[int] = None) -> Dict[str, Any]:
         """Call the fetcher and handler on the given URL. Return the download passed to the fetcher and handler."""
diff --git a/apps/crawler-fetcher/tests/python/setup_univision_test.py b/apps/crawler-fetcher/tests/python/setup_univision_test.py
index debb998431..5cf1d3a4eb 100644
--- a/apps/crawler-fetcher/tests/python/setup_univision_test.py
+++ b/apps/crawler-fetcher/tests/python/setup_univision_test.py
@@ -31,13 +31,13 @@ class AbstractUnivisionTest(object, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def univision_credentials(cls) -> Optional[UnivisionTestCredentials]:
         """Return test credentials to test Univision integration with, or None if you'd like the tests to be skipped."""
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     @classmethod
     @abc.abstractmethod
     def expect_to_find_some_stories(cls) -> bool:
         """If True, we should expect to find some stories in the downloaded feed."""
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     @classmethod
     def _mock_crawler_config(cls) -> CrawlerConfig:
diff --git a/apps/crawler-provider/.idea/crawler-provider.iml b/apps/crawler-provider/.idea/crawler-provider.iml
index 00c763162f..e64db4b116 100644
--- a/apps/crawler-provider/.idea/crawler-provider.iml
+++ b/apps/crawler-provider/.idea/crawler-provider.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (crawler-provider at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/crawler-provider/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (crawler-provider at [/home/pypt/m/apps/crawler-provider/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/crawler-provider/.idea/inspectionProfiles/Project_Default.xml b/apps/crawler-provider/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000000..7c041470bb
--- /dev/null
+++ b/apps/crawler-provider/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="HttpUrlsUsage" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
+  </profile>
+</component>
\ No newline at end of file
diff --git a/apps/crawler-provider/.idea/mediawords.sql b/apps/crawler-provider/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/crawler-provider/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/crawler-provider/.idea/misc.xml b/apps/crawler-provider/.idea/misc.xml
index 4ba9154104..ef27a6c4fa 100644
--- a/apps/crawler-provider/.idea/misc.xml
+++ b/apps/crawler-provider/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (crawler-provider at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/crawler-provider/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (crawler-provider at [/home/pypt/m/apps/crawler-provider/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/crawler-provider/.idea/sqlDataSources.xml b/apps/crawler-provider/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..05201c78f2
--- /dev/null
+++ b/apps/crawler-provider/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="a0ad0860-25b6-436b-92bb-e1c8e3b741b4" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/crawler-provider/.idea/sqldialects.xml b/apps/crawler-provider/.idea/sqldialects.xml
index 790b3f37f8..92fefa2e78 100644
--- a/apps/crawler-provider/.idea/sqldialects.xml
+++ b/apps/crawler-provider/.idea/sqldialects.xml
@@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="SqlDialectMappings">
+    <file url="file://$PROJECT_DIR$/.idea/mediawords.sql" dialect="PostgreSQL" />
     <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
     <file url="PROJECT" dialect="PostgreSQL" />
   </component>
diff --git a/apps/crawler-provider/docker-compose.tests.yml b/apps/crawler-provider/docker-compose.tests.yml
index e1c6fa25b4..0cfd0cbdfd 100644
--- a/apps/crawler-provider/docker-compose.tests.yml
+++ b/apps/crawler-provider/docker-compose.tests.yml
@@ -49,5 +49,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/crawler-provider/src/python/crawler_provider/__init__.py b/apps/crawler-provider/src/python/crawler_provider/__init__.py
index 175b1d4f26..f15569b800 100644
--- a/apps/crawler-provider/src/python/crawler_provider/__init__.py
+++ b/apps/crawler-provider/src/python/crawler_provider/__init__.py
@@ -15,12 +15,13 @@
 
 The provider works as a daemon, periodically checking the size queued_downloads and only adding
 new jobs to the queue if there are more than MAX_QUEUE_SIZE jobs in the table.  This allows us to implement
-throttline by keeping the crawler jobs queue relatively small, thus limiting the number of requests for each
+throttling by keeping the crawler jobs queue relatively small, thus limiting the number of requests for each
 host over a period of several minutes, while allowing the crawler_fetcher jobs to acts as simple stupid
 worker jobs that just do a quick query of queued_downloads to grab the oldest queued download.
 """
 
 import time
+from typing import List, Any, Iterator
 
 from mediawords.db import DatabaseHandler
 from mediawords.util.log import create_logger
@@ -97,7 +98,7 @@ def _add_stale_feeds(db: DatabaseHandler) -> None:
             -- Feed was downloaded more than stale_feed_interval seconds ago
             OR (last_attempted_download_time < (NOW() - (%(a)s || ' seconds')::interval))
 
-            -- (Probably) if a new story comes in every "n" seconds, refetch feed every "n" + 5 minutes
+            -- (Probably) if a new story comes in every "n" seconds, re-fetch feed every "n" + 5 minutes
             OR (
                 (NOW() > last_attempted_download_time +
                         (last_attempted_download_time - last_new_story_time) + interval '5 minutes')
@@ -114,11 +115,13 @@ def _add_stale_feeds(db: DatabaseHandler) -> None:
 
     db.query(
         """
+        -- noinspection SqlResolve @ table/"feeds_to_queue"
         UPDATE feeds
         SET last_attempted_download_time = NOW()
         WHERE feeds_id IN (SELECT feeds_id FROM feeds_to_queue)
         """)
 
+    # noinspection SqlResolve,SqlCheckUsingColumns
     downloads = db.query(
         """
         WITH inserted_downloads as (
@@ -141,12 +144,15 @@ def _add_stale_feeds(db: DatabaseHandler) -> None:
                 join feeds f using (feeds_id)
         """).hashes()
 
-    db.query("drop table feeds_to_queue")
+    db.query("""
+        -- noinspection SqlResolveForFile
+        drop table feeds_to_queue
+    """)
 
-    log.info("added stale feeds: %d" % len(downloads))
+    log.info(f"Added stale feeds: {len(downloads)}")
 
 
-def provide_download_ids(db: DatabaseHandler) -> None:
+def provide_download_ids(db: DatabaseHandler) -> List[int]:
     """Return a list of pending downloads ids to queue for fetching.
 
     Hand out one downloads_id for each distinct host with a pending download.
@@ -158,17 +164,49 @@ def provide_download_ids(db: DatabaseHandler) -> None:
 
     _add_stale_feeds(db)
 
-    log.info("querying pending downloads ...")
-
-    # get one downloads_id per host, ordered by priority asc, downloads_id desc, do this through a plpgsql
-    # function because that's the only way to avoid an index scan of the entire (host, priority, downloads_id) index
-    downloads_ids = db.query("select get_downloads_for_queue() downloads_id").flat()
+    log.info("Querying pending downloads...")
+
+    # get one downloads_id per host, ordered by priority asc, downloads_id desc
+    # noinspection SqlResolve
+    downloads_ids = db.query("""
+
+        -- Pending downloads by host, ranked by priority and the biggest "downloads_id"  
+        WITH pending_downloads_per_host AS (
+    
+            SELECT
+                host,
+                downloads_id,
+                ROW_NUMBER() OVER(
+                    PARTITION BY host
+                    ORDER BY
+                        priority,
+                        downloads_id DESC NULLS LAST
+                ) AS rank
+            FROM downloads_pending AS dp
+            WHERE (
+                SELECT 1
+                FROM queued_downloads AS qd
+                WHERE qd.downloads_id = dp.downloads_id
+            ) IS NULL
+        )
+        
+        SELECT downloads_id
+        FROM pending_downloads_per_host
+        WHERE rank = 1
+    
+    """).flat()
 
-    log.info("provide downloads host downloads: %d" % len(downloads_ids))
+    log.info(f"Providing {len(downloads_ids)} per-host download IDs")
 
     return downloads_ids
 
 
+def __chunks(list_to_be_chunked: List[Any], chunk_size: int) -> Iterator[List[Any]]:
+    """Yield successive chunks from parameter list."""
+    for i in range(0, len(list_to_be_chunked), chunk_size):
+        yield list_to_be_chunked[i:i + chunk_size]
+
+
 def run_provider(db: DatabaseHandler, daemon: bool = True) -> None:
     """Run the provider daemon to periodically add crawler_fetcher jobs by querying for pending downloads.
 
@@ -189,27 +227,42 @@ def run_provider(db: DatabaseHandler, daemon: bool = True) -> None:
         queue_size = db.query(
             "select count(*) from ( select 1 from queued_downloads limit %(a)s ) q",
             {'a': MAX_QUEUE_SIZE * 10}).flat()[0]
-        log.warning("queue_size: %d" % queue_size)
+        log.info(f"Queue size: {queue_size}")
 
         if queue_size < MAX_QUEUE_SIZE:
             downloads_ids = provide_download_ids(db)
 
             if downloads_ids:
-                log.warning("adding to downloads to queue: %d" % len(downloads_ids))
+                log.info(f"Adding {len(downloads_ids)} download IDs to queue...")
+
+                # Insert in chunks so that:
+                # 1) Fetchers get to fetching sooner;
+                # 2) We don't have to come up with a query that's 2 MB long.
+                for chunk_downloads_ids in __chunks(list_to_be_chunked=downloads_ids, chunk_size=1000):
+                    log.info(f"Inserting chunk of downloads ({len(chunk_downloads_ids)} download IDs)...")
+                    # noinspection SqlResolve,SqlSignature
+                    db.query(
+                        """
+                        INSERT INTO queued_downloads (downloads_id)
+                        VALUES (unnest (ARRAY %(chunk_downloads_ids)s::bigint[]))
+                        ON CONFLICT (downloads_id) DO NOTHING
+                        """ % {
+                            'chunk_downloads_ids': chunk_downloads_ids,
+                        }
+                    )
 
-                values = ','.join(["(%d)" % i for i in downloads_ids])
-                db.query(
-                    "insert into queued_downloads(downloads_id) values %s on conflict (downloads_id) do nothing" %
-                    values)
             else:
-                log.info("No downloads to add")
+                log.info("No download IDs to add")
 
             if daemon:
                 if time.time() - last_queue_time < QUEUE_INTERVAL:
+                    log.info(f"Sleeping for {QUEUE_INTERVAL} seconds")
                     time.sleep(QUEUE_INTERVAL)
 
         elif daemon:
-            time.sleep(QUEUE_INTERVAL * 10)
+            time_to_sleep = QUEUE_INTERVAL * 10
+            log.info(f"Sleeping for {time_to_sleep} seconds as we're running as a daemon")
+            time.sleep(time_to_sleep)
 
         last_queue_time = time.time()
 
diff --git a/apps/crawler-provider/tests/python/test_add_stale_feeds.py b/apps/crawler-provider/tests/python/test_add_stale_feeds.py
index 03b5e7e925..e7944889c8 100644
--- a/apps/crawler-provider/tests/python/test_add_stale_feeds.py
+++ b/apps/crawler-provider/tests/python/test_add_stale_feeds.py
@@ -3,6 +3,8 @@
 from mediawords.test.db.create import create_test_medium
 from mediawords.db import connect_to_db
 from mediawords.util.sql import sql_now, get_sql_date_from_epoch
+
+# noinspection PyProtectedMember
 from crawler_provider import _add_stale_feeds
 
 
@@ -32,7 +34,7 @@ def test_add_stale_feeds():
         'active': True,
         'last_attempted_download_time': sql_now()
     }
-    feed = db.create('feeds', feed)
+    db.create('feeds', feed)
 
     feed = {
         'media_id': medium['media_id'],
@@ -43,7 +45,7 @@ def test_add_stale_feeds():
         'last_attempted_download_time': sql_now(),
         'last_new_story_time': sql_now()
     }
-    feed = db.create('feeds', feed)
+    db.create('feeds', feed)
 
     feed = {
         'media_id': medium['media_id'],
@@ -51,8 +53,8 @@ def test_add_stale_feeds():
         'url': 'http://5 minute new story',
         'type': 'syndicated',
         'active': True,
-        'last_attempted_download_time': get_sql_date_from_epoch(time.time() - 300),
-        'last_new_story_time': get_sql_date_from_epoch(time.time() - 300),
+        'last_attempted_download_time': get_sql_date_from_epoch(int(time.time()) - 300),
+        'last_new_story_time': get_sql_date_from_epoch(int(time.time()) - 300),
     }
     feed = db.create('feeds', feed)
     pending_feeds.append(feed)
@@ -63,7 +65,7 @@ def test_add_stale_feeds():
         'url': 'http://old last download',
         'type': 'syndicated',
         'active': True,
-        'last_attempted_download_time': get_sql_date_from_epoch(time.time() - (86400 * 10))
+        'last_attempted_download_time': get_sql_date_from_epoch(int(time.time()) - (86400 * 10))
     }
     feed = db.create('feeds', feed)
     pending_feeds.append(feed)
diff --git a/apps/crawler-provider/tests/python/test_provide_download_ids.py b/apps/crawler-provider/tests/python/test_provide_download_ids.py
index 6c51e7a2f6..b55af28e6e 100644
--- a/apps/crawler-provider/tests/python/test_provide_download_ids.py
+++ b/apps/crawler-provider/tests/python/test_provide_download_ids.py
@@ -1,5 +1,6 @@
 from mediawords.db import connect_to_db
 from mediawords.test.db.create import create_test_medium, create_test_feed
+
 from crawler_provider import provide_download_ids
 
 
diff --git a/apps/crawler-provider/tests/python/test_run_provider.py b/apps/crawler-provider/tests/python/test_run_provider.py
index 896e468ca7..a456c48862 100644
--- a/apps/crawler-provider/tests/python/test_run_provider.py
+++ b/apps/crawler-provider/tests/python/test_run_provider.py
@@ -1,9 +1,10 @@
 import time
 
-from crawler_provider import run_provider
 from mediawords.test.db.create import create_test_medium, create_test_feed
 from mediawords.db import connect_to_db
 
+from crawler_provider import run_provider
+
 
 def test_run_provider():
     db = connect_to_db()
diff --git a/apps/create-missing-partitions/.idea/create-missing-partitions.iml b/apps/create-missing-partitions/.idea/create-missing-partitions.iml
index 42988ad5e7..3df2c0c662 100644
--- a/apps/create-missing-partitions/.idea/create-missing-partitions.iml
+++ b/apps/create-missing-partitions/.idea/create-missing-partitions.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (create-missing-partitions at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/create-missing-partitions/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (create-missing-partitions at [/home/pypt/m/apps/create-missing-partitions/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/create-missing-partitions/.idea/mediawords.sql b/apps/create-missing-partitions/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/create-missing-partitions/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/create-missing-partitions/.idea/misc.xml b/apps/create-missing-partitions/.idea/misc.xml
index 6a91d1a00f..0fb1cc2fba 100644
--- a/apps/create-missing-partitions/.idea/misc.xml
+++ b/apps/create-missing-partitions/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (create-missing-partitions at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/create-missing-partitions/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (create-missing-partitions at [/home/pypt/m/apps/create-missing-partitions/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/create-missing-partitions/.idea/sqlDataSources.xml b/apps/create-missing-partitions/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..9e8bed1c5a
--- /dev/null
+++ b/apps/create-missing-partitions/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="0a301c2a-0309-4fc4-a9ef-3b65fe171701" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/create-missing-partitions/docker-compose.tests.yml b/apps/create-missing-partitions/docker-compose.tests.yml
index cbafcefd70..a454e2a84b 100644
--- a/apps/create-missing-partitions/docker-compose.tests.yml
+++ b/apps/create-missing-partitions/docker-compose.tests.yml
@@ -43,5 +43,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-generate-daily-rss-dumps/.idea/cron-generate-daily-rss-dumps.iml b/apps/cron-generate-daily-rss-dumps/.idea/cron-generate-daily-rss-dumps.iml
index 524923feed..4c3138569c 100644
--- a/apps/cron-generate-daily-rss-dumps/.idea/cron-generate-daily-rss-dumps.iml
+++ b/apps/cron-generate-daily-rss-dumps/.idea/cron-generate-daily-rss-dumps.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-generate-daily-rss-dumps at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-generate-daily-rss-dumps at [/home/pypt/m/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-generate-daily-rss-dumps/.idea/mediawords.sql b/apps/cron-generate-daily-rss-dumps/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-generate-daily-rss-dumps/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-generate-daily-rss-dumps/.idea/misc.xml b/apps/cron-generate-daily-rss-dumps/.idea/misc.xml
index 5012d8cb66..70d7383712 100644
--- a/apps/cron-generate-daily-rss-dumps/.idea/misc.xml
+++ b/apps/cron-generate-daily-rss-dumps/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-generate-daily-rss-dumps at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-generate-daily-rss-dumps at [/home/pypt/m/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-generate-daily-rss-dumps/.idea/sqlDataSources.xml b/apps/cron-generate-daily-rss-dumps/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..3eacf166ca
--- /dev/null
+++ b/apps/cron-generate-daily-rss-dumps/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="eea8e828-933e-482a-8c7e-92505255f2c2" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml b/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml
index 80a2254651..56fdc8b91e 100644
--- a/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml
+++ b/apps/cron-generate-daily-rss-dumps/docker-compose.tests.yml
@@ -46,5 +46,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-generate-media-health/.idea/cron-generate-media-health.iml b/apps/cron-generate-media-health/.idea/cron-generate-media-health.iml
index 60ad626c86..995969f847 100644
--- a/apps/cron-generate-media-health/.idea/cron-generate-media-health.iml
+++ b/apps/cron-generate-media-health/.idea/cron-generate-media-health.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-generate-media-health at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-generate-media-health/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-generate-media-health at [/home/pypt/m/apps/cron-generate-media-health/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-generate-media-health/.idea/mediawords.sql b/apps/cron-generate-media-health/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-generate-media-health/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-generate-media-health/.idea/misc.xml b/apps/cron-generate-media-health/.idea/misc.xml
index 4457122b86..6cccc07d8f 100644
--- a/apps/cron-generate-media-health/.idea/misc.xml
+++ b/apps/cron-generate-media-health/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-generate-media-health at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-generate-media-health/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-generate-media-health at [/home/pypt/m/apps/cron-generate-media-health/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-generate-media-health/.idea/sqlDataSources.xml b/apps/cron-generate-media-health/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..57bc924113
--- /dev/null
+++ b/apps/cron-generate-media-health/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="8f66c07e-0910-4265-a691-e53a516cd7dd" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-generate-media-health/docker-compose.tests.yml b/apps/cron-generate-media-health/docker-compose.tests.yml
index d0e3004326..74b389d3e9 100644
--- a/apps/cron-generate-media-health/docker-compose.tests.yml
+++ b/apps/cron-generate-media-health/docker-compose.tests.yml
@@ -52,5 +52,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-generate-user-summary/.idea/cron-generate-user-summary.iml b/apps/cron-generate-user-summary/.idea/cron-generate-user-summary.iml
index 8221ae113c..43e8502909 100644
--- a/apps/cron-generate-user-summary/.idea/cron-generate-user-summary.iml
+++ b/apps/cron-generate-user-summary/.idea/cron-generate-user-summary.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-generate-user-summary at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-generate-user-summary/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-generate-user-summary at [/home/pypt/m/apps/cron-generate-user-summary/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-generate-user-summary/.idea/mediawords.sql b/apps/cron-generate-user-summary/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-generate-user-summary/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-generate-user-summary/.idea/misc.xml b/apps/cron-generate-user-summary/.idea/misc.xml
index 593c83ec4a..c12db7e294 100644
--- a/apps/cron-generate-user-summary/.idea/misc.xml
+++ b/apps/cron-generate-user-summary/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-generate-user-summary at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-generate-user-summary/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-generate-user-summary at [/home/pypt/m/apps/cron-generate-user-summary/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-generate-user-summary/.idea/sqlDataSources.xml b/apps/cron-generate-user-summary/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..e8ade52e9c
--- /dev/null
+++ b/apps/cron-generate-user-summary/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="061f617c-8e69-406c-9ad6-e8dd05dc9eb6" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-generate-user-summary/docker-compose.tests.yml b/apps/cron-generate-user-summary/docker-compose.tests.yml
index c02b92f977..1ac0b72ada 100644
--- a/apps/cron-generate-user-summary/docker-compose.tests.yml
+++ b/apps/cron-generate-user-summary/docker-compose.tests.yml
@@ -46,5 +46,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-print-long-running-job-states/.idea/cron-print-long-running-job-states.iml b/apps/cron-print-long-running-job-states/.idea/cron-print-long-running-job-states.iml
index cc772aeb8e..29e6a7ec66 100644
--- a/apps/cron-print-long-running-job-states/.idea/cron-print-long-running-job-states.iml
+++ b/apps/cron-print-long-running-job-states/.idea/cron-print-long-running-job-states.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-print-long-running-job-states at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-print-long-running-job-states/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-print-long-running-job-states at [/home/pypt/m/apps/cron-print-long-running-job-states/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-print-long-running-job-states/.idea/mediawords.sql b/apps/cron-print-long-running-job-states/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-print-long-running-job-states/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-print-long-running-job-states/.idea/misc.xml b/apps/cron-print-long-running-job-states/.idea/misc.xml
index c036f5aa0f..f9bdf2011c 100644
--- a/apps/cron-print-long-running-job-states/.idea/misc.xml
+++ b/apps/cron-print-long-running-job-states/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-print-long-running-job-states at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-print-long-running-job-states/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-print-long-running-job-states at [/home/pypt/m/apps/cron-print-long-running-job-states/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-print-long-running-job-states/.idea/sqlDataSources.xml b/apps/cron-print-long-running-job-states/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..34938f86d8
--- /dev/null
+++ b/apps/cron-print-long-running-job-states/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="05a29f4e-2857-4587-a5bb-a26573604c55" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-print-long-running-job-states/docker-compose.tests.yml b/apps/cron-print-long-running-job-states/docker-compose.tests.yml
index f84022e5b3..f2eb667e72 100644
--- a/apps/cron-print-long-running-job-states/docker-compose.tests.yml
+++ b/apps/cron-print-long-running-job-states/docker-compose.tests.yml
@@ -46,5 +46,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-refresh-stats/.idea/cron-refresh-stats.iml b/apps/cron-refresh-stats/.idea/cron-refresh-stats.iml
index 1653c18a80..1d45b72bf1 100644
--- a/apps/cron-refresh-stats/.idea/cron-refresh-stats.iml
+++ b/apps/cron-refresh-stats/.idea/cron-refresh-stats.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-refresh-stats at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-refresh-stats/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-refresh-stats at [/home/pypt/m/apps/cron-refresh-stats/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-refresh-stats/.idea/mediawords.sql b/apps/cron-refresh-stats/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-refresh-stats/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-refresh-stats/.idea/misc.xml b/apps/cron-refresh-stats/.idea/misc.xml
index 169914fe12..993cab23e3 100644
--- a/apps/cron-refresh-stats/.idea/misc.xml
+++ b/apps/cron-refresh-stats/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-refresh-stats at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-refresh-stats/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-refresh-stats at [/home/pypt/m/apps/cron-refresh-stats/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-refresh-stats/.idea/sqlDataSources.xml b/apps/cron-refresh-stats/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..bf483f8df2
--- /dev/null
+++ b/apps/cron-refresh-stats/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="6b7397d5-14c8-44b6-b501-01b1ff1f3c8a" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-refresh-stats/docker-compose.tests.yml b/apps/cron-refresh-stats/docker-compose.tests.yml
index 2ccefa0ed7..9af8e30be6 100644
--- a/apps/cron-refresh-stats/docker-compose.tests.yml
+++ b/apps/cron-refresh-stats/docker-compose.tests.yml
@@ -46,5 +46,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-rescrape-due-media/.idea/cron-rescrape-due-media.iml b/apps/cron-rescrape-due-media/.idea/cron-rescrape-due-media.iml
index 1ef711ee68..0a5eca061e 100644
--- a/apps/cron-rescrape-due-media/.idea/cron-rescrape-due-media.iml
+++ b/apps/cron-rescrape-due-media/.idea/cron-rescrape-due-media.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-rescrape-due-media at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-rescrape-due-media/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-rescrape-due-media at [/home/pypt/m/apps/cron-rescrape-due-media/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-rescrape-due-media/.idea/mediawords.sql b/apps/cron-rescrape-due-media/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-rescrape-due-media/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-rescrape-due-media/.idea/misc.xml b/apps/cron-rescrape-due-media/.idea/misc.xml
index 2f005776c1..652440f052 100644
--- a/apps/cron-rescrape-due-media/.idea/misc.xml
+++ b/apps/cron-rescrape-due-media/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-rescrape-due-media at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-rescrape-due-media/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-rescrape-due-media at [/home/pypt/m/apps/cron-rescrape-due-media/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-rescrape-due-media/.idea/sqlDataSources.xml b/apps/cron-rescrape-due-media/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..401967c19c
--- /dev/null
+++ b/apps/cron-rescrape-due-media/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="9a6f7394-2b73-41cb-820b-ff133d3c94ae" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-rescrape-due-media/docker-compose.tests.yml b/apps/cron-rescrape-due-media/docker-compose.tests.yml
index 9284439ac1..1b321172a7 100644
--- a/apps/cron-rescrape-due-media/docker-compose.tests.yml
+++ b/apps/cron-rescrape-due-media/docker-compose.tests.yml
@@ -47,8 +47,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/cron-rescraping-changes/.idea/cron-rescraping-changes.iml b/apps/cron-rescraping-changes/.idea/cron-rescraping-changes.iml
index 9c4fdfff81..1a8d59bdb4 100644
--- a/apps/cron-rescraping-changes/.idea/cron-rescraping-changes.iml
+++ b/apps/cron-rescraping-changes/.idea/cron-rescraping-changes.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-rescraping-changes at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-rescraping-changes/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-rescraping-changes at [/home/pypt/m/apps/cron-rescraping-changes/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-rescraping-changes/.idea/mediawords.sql b/apps/cron-rescraping-changes/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-rescraping-changes/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-rescraping-changes/.idea/misc.xml b/apps/cron-rescraping-changes/.idea/misc.xml
index 1b8e228190..08275c44df 100644
--- a/apps/cron-rescraping-changes/.idea/misc.xml
+++ b/apps/cron-rescraping-changes/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-rescraping-changes at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-rescraping-changes/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-rescraping-changes at [/home/pypt/m/apps/cron-rescraping-changes/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-rescraping-changes/.idea/sqlDataSources.xml b/apps/cron-rescraping-changes/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..4c57723ab0
--- /dev/null
+++ b/apps/cron-rescraping-changes/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="e0e9ef67-d563-4d19-b5ba-47188d30dac4" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-rescraping-changes/docker-compose.tests.yml b/apps/cron-rescraping-changes/docker-compose.tests.yml
index 6437093c02..a563decac7 100644
--- a/apps/cron-rescraping-changes/docker-compose.tests.yml
+++ b/apps/cron-rescraping-changes/docker-compose.tests.yml
@@ -46,5 +46,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-set-media-primary-language/.idea/cron-set-media-primary-language.iml b/apps/cron-set-media-primary-language/.idea/cron-set-media-primary-language.iml
index f310743f07..0e114839d4 100644
--- a/apps/cron-set-media-primary-language/.idea/cron-set-media-primary-language.iml
+++ b/apps/cron-set-media-primary-language/.idea/cron-set-media-primary-language.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-set-media-primary-language at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-set-media-primary-language/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-set-media-primary-language at [/home/pypt/m/apps/cron-set-media-primary-language/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-set-media-primary-language/.idea/mediawords.sql b/apps/cron-set-media-primary-language/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-set-media-primary-language/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-set-media-primary-language/.idea/misc.xml b/apps/cron-set-media-primary-language/.idea/misc.xml
index 8876443b65..23250328bb 100644
--- a/apps/cron-set-media-primary-language/.idea/misc.xml
+++ b/apps/cron-set-media-primary-language/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-set-media-primary-language at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-set-media-primary-language/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-set-media-primary-language at [/home/pypt/m/apps/cron-set-media-primary-language/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-set-media-primary-language/.idea/sqlDataSources.xml b/apps/cron-set-media-primary-language/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..ca1eda3677
--- /dev/null
+++ b/apps/cron-set-media-primary-language/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="fc99b94c-b7c2-412b-9469-574312303036" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-set-media-primary-language/docker-compose.tests.yml b/apps/cron-set-media-primary-language/docker-compose.tests.yml
index 69c7cd43a6..f4e0ca5bf7 100644
--- a/apps/cron-set-media-primary-language/docker-compose.tests.yml
+++ b/apps/cron-set-media-primary-language/docker-compose.tests.yml
@@ -52,5 +52,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/cron-set-media-subject-country/.idea/cron-set-media-subject-country.iml b/apps/cron-set-media-subject-country/.idea/cron-set-media-subject-country.iml
index 035f4e65b2..c714455c7d 100644
--- a/apps/cron-set-media-subject-country/.idea/cron-set-media-subject-country.iml
+++ b/apps/cron-set-media-subject-country/.idea/cron-set-media-subject-country.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (cron-set-media-subject-country at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-set-media-subject-country/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (cron-set-media-subject-country at [/home/pypt/m/apps/cron-set-media-subject-country/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/cron-set-media-subject-country/.idea/mediawords.sql b/apps/cron-set-media-subject-country/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/cron-set-media-subject-country/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/cron-set-media-subject-country/.idea/misc.xml b/apps/cron-set-media-subject-country/.idea/misc.xml
index 4b93bd35c3..2541630658 100644
--- a/apps/cron-set-media-subject-country/.idea/misc.xml
+++ b/apps/cron-set-media-subject-country/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (cron-set-media-subject-country at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/cron-set-media-subject-country/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (cron-set-media-subject-country at [/home/pypt/m/apps/cron-set-media-subject-country/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/cron-set-media-subject-country/.idea/sqlDataSources.xml b/apps/cron-set-media-subject-country/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..d57cbe6661
--- /dev/null
+++ b/apps/cron-set-media-subject-country/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="ff0e2bf1-a29f-4739-bd42-6abc50a62ba7" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/cron-set-media-subject-country/docker-compose.tests.yml b/apps/cron-set-media-subject-country/docker-compose.tests.yml
index 99c543ee36..01864f5a44 100644
--- a/apps/cron-set-media-subject-country/docker-compose.tests.yml
+++ b/apps/cron-set-media-subject-country/docker-compose.tests.yml
@@ -52,5 +52,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/docker-compose.dist.yml b/apps/docker-compose.dist.yml
index 42d0e234f0..c015ab9c87 100644
--- a/apps/docker-compose.dist.yml
+++ b/apps/docker-compose.dist.yml
@@ -74,6 +74,14 @@ x-common-configuration: &common-configuration
     # "From:" email address when sending emails
     MC_EMAIL_FROM_ADDRESS: "info@mediacloud.org"
 
+    # Email address to point to in List-Unsubscribe email header.
+    # Technically we don't have a straightforward "unsubscribe" endpoint, but our 
+    # emails are more likely to be marked spam if we don't have such a header, so
+    # we make the email subject "Delete account and unsubscribe" in 
+    # mediawords/util/config/common.py
+    # example value = support@example.com 
+    MC_EMAIL_UNSUBSCRIBE: "support@example.com"
+
     # Fail all HTTP requests that match the following pattern, e.g.
     # "^https?://[^/]*some-website.com"
     MC_USERAGENT_BLACKLIST_URL_PATTERN: ""
@@ -163,37 +171,6 @@ x-brandwatch-api-configuration: &brandwatch-api-configuration
     MC_BRANDWATCH_PASSWORD: ""
 
 
-#
-# Google Cloud for podcast transcription common configuration
-# ===========================================================
-#
-x-podcast-google-cloud-configuration:    &podcast-google-cloud-configuration
-
-    # Base64-encoded Google Cloud authentication JSON file for a service account that
-    # uploads episodes to Google Cloud Storage and submits Speech API jobs; refer to
-    # doc/podcasts_gc_auth.markdown for instructions on how to create such an
-    # account.
-    #
-    # How to generate Base64 encoded credentials:
-    #
-    #     $ base64 mediacloud-service-account-credentials.json
-    #
-    MC_PODCAST_GC_AUTH_JSON_BASE64: '
-        ewogICAgInR5cGUiOiAic2VydmljZV9hY2NvdW50IiwKICAgICJwcm9qZWN0X2lkIjogImV
-        4YW1wbGUiLAogICAgInByaXZhdGVfa2V5X2lkIjogIjdmMTY5YTIxZDNmODA5NzQzNjRiY2
-        YwOWYyMDQ3ZWEwZWZiNTY4M2EiLAogICAgInByaXZhdGVfa2V5IjogIi0tLS0tQkVHSU4gU
-        FJJVkFURSBLRVktLS0tLVxuPC4uLj5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIs
-        CiAgICAiY2xpZW50X2VtYWlsIjogImV4YW1wbGVAZXhhbXBsZS5pYW0uZ3NlcnZpY2VhY2N
-        vdW50LmNvbSIsCiAgICAiY2xpZW50X2lkIjogIjEyMyIsCiAgICAiYXV0aF91cmkiOiAiaH
-        R0cHM6Ly9hY2NvdW50cy5nb29nbGUuY29tL28vb2F1dGgyL2F1dGgiLAogICAgInRva2VuX
-        3VyaSI6ICJodHRwczovL29hdXRoMi5nb29nbGVhcGlzLmNvbS90b2tlbiIsCiAgICAiYXV0
-        aF9wcm92aWRlcl94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29
-        tL29hdXRoMi92MS9jZXJ0cyIsCiAgICAiY2xpZW50X3g1MDlfY2VydF91cmwiOiAiaHR0cH
-        M6Ly93d3cuZ29vZ2xlYXBpcy5jb20vcm9ib3QvdjEvbWV0YWRhdGEveDUwOS9leGFtcGxlJ
-        TQwZXhhbXBsZS5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbSIKfQ==
-    '
-
-
 #
 # Solr shard base service
 # =======================
@@ -1052,6 +1029,8 @@ services:
         depends_on:
             # Reads data from Munin node
             - munin-node
+        environment:
+            MC_MUNIN_CRON_ALERT_EMAIL: "FIXME@mediacloud.org"
         volumes:
             # Shared with "munin-fastcgi-graph" container:
             - vol_munin_data:/var/lib/munin/
@@ -1218,7 +1197,7 @@ services:
     # NYTLabels fetch annotation and tag
     # -----------------------
     #
-    nytlabels-update-story-tags:
+    nytlabels-fetch-annotation-and-tag:
         image: gcr.io/mcback/nytlabels-fetch-annotation-and-tag:release
         init: true
         networks:
@@ -1246,24 +1225,21 @@ services:
                     memory: "512M"
 
     #
-    # Fetch story podcast episode and store it in GCS
-    # -----------------------------------------------
+    # Fetch story podcast episode and store it in GCS (RabbitMQ worker)
+    # -----------------------------------------------------------------
     #
-    podcast-fetch-episode:
-        image: gcr.io/mcback/podcast-fetch-episode:release
+    podcast-transcribe-episode-rabbitmq-worker:
+        image: gcr.io/mcback/podcast-transcribe-episode:release
+        command: "rabbitmq-worker.py"
         init: true
         networks:
             - default
         environment:
             <<: *common-configuration
-            <<: *podcast-google-cloud-configuration
-            # Google Cloud Storage bucket name for storing episode audio files
-            MC_PODCAST_FETCH_EPISODE_BUCKET_NAME: "mediacloud-story-podcast-episodes"
-            # Google Cloud Storage path prefix for storing episode audio files
-            MC_PODCAST_FETCH_EPISODE_PATH_PREFIX: "episodes"
         depends_on:
             - postgresql-pgbouncer
             - rabbitmq-server
+            - temporal-server
         deploy:
             <<: *misc-apps_deploy_placement_constraints
             <<: *endpoint-mode-dnsrr
@@ -1282,80 +1258,53 @@ services:
                 max_attempts: 3
 
     #
-    # Fetch finished transcripts and store them locally
-    # -------------------------------------------------
-    podcast-fetch-transcript:
-        image: gcr.io/mcback/podcast-fetch-transcript:release
-        init: true
-        networks:
-            - default
-        environment:
-            <<: *common-configuration
-            <<: *podcast-google-cloud-configuration
-        depends_on:
-            - postgresql-pgbouncer
-            - rabbitmq-server
-        deploy:
-            <<: *misc-apps_deploy_placement_constraints
-            <<: *endpoint-mode-dnsrr
-            # Worker count
-            replicas: 1
-            resources:
-                limits:
-                    # CPU core limit
-                    cpus: "1"
-                    # RAM limit
-                    memory: "256M"
-            restart_policy:
-                # Automatically restart on non-zero exit codes only instead of on any exit
-                condition: on-failure
-                # Autorestart up to three times
-                max_attempts: 3
-
-    #
-    # Poll due operations and submit them to "podcast-fetch-transcript"
+    # Fetch story podcast episode and store it in GCS (Temporal worker)
     # -----------------------------------------------------------------
-    podcast-poll-due-operations:
-        image: gcr.io/mcback/podcast-poll-due-operations:release
-        init: true
-        networks:
-            - default
-        environment:
-            <<: *common-configuration
-        depends_on:
-            - postgresql-pgbouncer
-            - rabbitmq-server
-        deploy:
-            <<: *misc-apps_deploy_placement_constraints
-            <<: *endpoint-mode-dnsrr
-            # Worker count
-            replicas: 1
-            resources:
-                limits:
-                    # CPU core limit
-                    cpus: "1"
-                    # RAM limit
-                    memory: "256M"
-            restart_policy:
-                # Automatically restart on non-zero exit codes only instead of on any exit
-                condition: on-failure
-                # Autorestart up to three times
-                max_attempts: 3
-
     #
-    # Submit a Speech API operation for a podcast episode
-    # ---------------------------------------------------
-    podcast-submit-operation:
-        image: gcr.io/mcback/podcast-submit-operation:release
+    podcast-transcribe-episode-temporal-worker:
+        image: gcr.io/mcback/podcast-transcribe-episode:release
+        command: "temporal-worker.py"
         init: true
         networks:
             - default
         environment:
             <<: *common-configuration
-            <<: *podcast-google-cloud-configuration
+            # GCS bucket name and path prefix for storing raw, untranscoded enclosure files
+            MC_PODCAST_RAW_ENCLOSURES_BUCKET_NAME: "FIXME"
+            MC_PODCAST_RAW_ENCLOSURES_PATH_PREFIX: "enclosures"
+            # GCS bucket name and path prefix for storing transcoded episodes
+            MC_PODCAST_TRANSCODED_EPISODES_BUCKET_NAME: "FIXME"
+            MC_PODCAST_TRANSCODED_EPISODES_PATH_PREFIX: "episodes"
+            # GCS bucket name and path prefix for storing raw JSON transcripts
+            MC_PODCAST_TRANSCRIPTS_BUCKET_NAME: "FIXME"
+            MC_PODCAST_TRANSCRIPTS_PATH_PREFIX: "transcripts"
+            # Base64-encoded Google Cloud authentication JSON file for a service account that
+            # uploads episodes to Google Cloud Storage and submits Speech API jobs; refer to
+            # doc/podcasts_gc_auth.markdown for instructions on how to create such an
+            # account.
+            #
+            # How to generate Base64 encoded credentials:
+            #
+            #     $ base64 mediacloud-service-account-credentials.json
+            #
+            MC_PODCAST_AUTH_JSON_BASE64: '
+                ewogICAgInR5cGUiOiAic2VydmljZV9hY2NvdW50IiwKICAgICJwcm9qZWN0X2lkIjogImV
+                4YW1wbGUiLAogICAgInByaXZhdGVfa2V5X2lkIjogIjdmMTY5YTIxZDNmODA5NzQzNjRiY2
+                YwOWYyMDQ3ZWEwZWZiNTY4M2EiLAogICAgInByaXZhdGVfa2V5IjogIi0tLS0tQkVHSU4gU
+                FJJVkFURSBLRVktLS0tLVxuPC4uLj5cbi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS1cbiIs
+                CiAgICAiY2xpZW50X2VtYWlsIjogImV4YW1wbGVAZXhhbXBsZS5pYW0uZ3NlcnZpY2VhY2N
+                vdW50LmNvbSIsCiAgICAiY2xpZW50X2lkIjogIjEyMyIsCiAgICAiYXV0aF91cmkiOiAiaH
+                R0cHM6Ly9hY2NvdW50cy5nb29nbGUuY29tL28vb2F1dGgyL2F1dGgiLAogICAgInRva2VuX
+                3VyaSI6ICJodHRwczovL29hdXRoMi5nb29nbGVhcGlzLmNvbS90b2tlbiIsCiAgICAiYXV0
+                aF9wcm92aWRlcl94NTA5X2NlcnRfdXJsIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29
+                tL29hdXRoMi92MS9jZXJ0cyIsCiAgICAiY2xpZW50X3g1MDlfY2VydF91cmwiOiAiaHR0cH
+                M6Ly93d3cuZ29vZ2xlYXBpcy5jb20vcm9ib3QvdjEvbWV0YWRhdGEveDUwOS9leGFtcGxlJ
+                TQwZXhhbXBsZS5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbSIKfQ==
+            '
         depends_on:
             - postgresql-pgbouncer
             - rabbitmq-server
+            - temporal-server
         deploy:
             <<: *misc-apps_deploy_placement_constraints
             <<: *endpoint-mode-dnsrr
@@ -1364,9 +1313,9 @@ services:
             resources:
                 limits:
                     # CPU core limit
-                    cpus: "1"
+                    cpus: "2"
                     # RAM limit
-                    memory: "256M"
+                    memory: "4G"
             restart_policy:
                 # Automatically restart on non-zero exit codes only instead of on any exit
                 condition: on-failure
@@ -1834,6 +1783,187 @@ services:
                     # RAM limit
                     memory: "1G"
 
+    #
+    # Temporal Elasticsearch (searching for workflows)
+    # ------------------------------------------------
+    #
+    temporal-elasticsearch:
+        image: gcr.io/mcback/temporal-elasticsearch:release
+        init: true
+        networks:
+            - default
+        expose:
+            - "9200"
+            - "9300"
+        volumes:
+            - vol_temporal_elasticsearch_data:/var/lib/elasticsearch/
+        deploy:
+            <<: *endpoint-mode-dnsrr
+            placement:
+                constraints:
+                    # Must run on the host with Temporal Elasticsearch data volume
+                    - node.labels.role-temporal-elasticsearch == true
+            # Worker count
+            replicas: 1
+            resources:
+                limits:
+                    # CPU core limit
+                    cpus: "4"
+                    # RAM limit
+                    memory: "16G"
+
+    #
+    # Temporal Grafana (web UI for Temporal's stats)
+    # ----------------------------------------------
+    #
+    temporal-grafana:
+        image: gcr.io/mcback/temporal-grafana:release
+        init: true
+        networks:
+            - default
+        expose:
+            - "3000"
+        ports:
+            # For connecting to through a SSH tunnel
+            # MAKE SURE to BLOCK THIS PORT in provision/roles/docker/tasks/iptables.yml
+            - "3000:3000"
+        volumes:
+            - vol_temporal_grafana_data:/var/lib/grafana/
+        deploy:
+            <<: *endpoint-mode-dnsrr
+            placement:
+                constraints:
+                    # Must run on the host with Temporal Grafana data volume
+                    - node.labels.role-temporal-grafana == true
+            # Worker count
+            replicas: 1
+            resources:
+                limits:
+                    cpus: "2"
+                    memory: "2G"
+
+    #
+    # Temporal PostgreSQL (Temporal's main data store)
+    # ------------------------------------------------
+    #
+    temporal-postgresql:
+        image: gcr.io/mcback/temporal-postgresql:release
+        init: true
+        networks:
+            - default
+        expose:
+            - 5432
+        volumes:
+            - vol_temporal_postgresql_data:/var/lib/postgresql/
+        deploy:
+            <<: *endpoint-mode-dnsrr
+            placement:
+                constraints:
+                    # Must run on the host with Temporal PostgreSQL server data volume
+                    - node.labels.role-temporal-postgresql == true
+            # Worker count
+            replicas: 1
+            resources:
+                limits:
+                    # CPU core limit
+                    cpus: "8"
+                    # RAM limit
+                    memory: "32G"
+
+    #
+    # Temporal Prometheus (Temporal's statistics store)
+    # -------------------------------------------------
+    #
+    temporal-prometheus:
+        image: gcr.io/mcback/temporal-prometheus:release
+        init: true
+        depends_on:
+            - temporal-grafana
+        networks:
+            - default
+        expose:
+            - "9090"
+        volumes:
+            - vol_temporal_prometheus_data:/opt/prometheus/data/
+        deploy:
+            <<: *endpoint-mode-dnsrr
+            placement:
+                constraints:
+                    # Must run on the host with Temporal Prometheus data volume
+                    - node.labels.role-temporal-prometheus == true
+            # Worker count
+            replicas: 1
+            resources:
+                limits:
+                    cpus: "2"
+                    memory: "2G"
+
+    #
+    # Temporal server (running stateful workflows)
+    # --------------------------------------------
+    #
+    temporal-server:
+        image: gcr.io/mcback/temporal-server:release
+        init: true
+        networks:
+            - default
+        depends_on:
+            - temporal-postgresql
+            - temporal-elasticsearch
+            - temporal-prometheus
+        expose:
+            - 6933
+            - 6934
+            - 6935
+            - 6939
+            - 7233
+            - 7234
+            - 7235
+            - 7239
+        volumes:
+            - vol_temporal_server_archives:/var/lib/temporal/
+        deploy:
+            <<: *endpoint-mode-dnsrr
+            placement:
+                constraints:
+                    # Must run on the host with Temporal server data volume
+                    - node.labels.role-temporal-server == true
+            # Worker count
+            replicas: 1
+            resources:
+                limits:
+                    # CPU core limit
+                    cpus: "8"
+                    # RAM limit
+                    memory: "32G"
+
+    #
+    # Temporal webapp (tracking workflow state)
+    # -----------------------------------------
+    #
+    temporal-webapp:
+        image: gcr.io/mcback/temporal-webapp:release
+        init: true
+        networks:
+            - default
+        expose:
+            - "8088"
+        ports:
+            # For connecting to through a SSH tunnel
+            # MAKE SURE to BLOCK THIS PORT in provision/roles/docker/tasks/iptables.yml
+            - "8088:8088"
+        deploy:
+            <<: *misc-apps_deploy_placement_constraints
+            <<: *endpoint-mode-dnsrr
+            # Worker count
+            replicas: 1
+            resources:
+                limits:
+                    # CPU core limit
+                    cpus: "2"
+                    # RAM limit
+                    memory: "4G"
+
     #
     # Extract story links for a topic
     # -------------------------------
@@ -2365,3 +2495,43 @@ volumes:
             type: none
             o: bind
             device: /space/mediacloud/vol_elk_elasticsearch_data
+
+    # Temporal server workflow archives
+    vol_temporal_server_archives:
+        driver: local
+        driver_opts:
+            type: none
+            o: bind
+            device: /space/mediacloud/vol_temporal_server_archives
+
+    # Temporal PostgreSQL server data
+    vol_temporal_postgresql_data:
+        driver: local
+        driver_opts:
+            type: none
+            o: bind
+            device: /space/mediacloud/vol_postgresql_data
+
+    # Temporal Elasticsearch data
+    vol_temporal_elasticsearch_data:
+        driver: local
+        driver_opts:
+            type: none
+            o: bind
+            device: /space/mediacloud/vol_temporal_elasticsearch_data
+
+    # Temporal Prometheus data
+    vol_temporal_prometheus_data:
+        driver: local
+        driver_opts:
+            type: none
+            o: bind
+            device: /space/mediacloud/vol_temporal_prometheus_data
+
+    # Temporal Grafana data
+    vol_temporal_grafana_data:
+        driver: local
+        driver_opts:
+            type: none
+            o: bind
+            device: /space/mediacloud/vol_temporal_grafana_data
diff --git a/apps/dump-table/.idea/dump-table.iml b/apps/dump-table/.idea/dump-table.iml
index b01990d739..8bb1623091 100644
--- a/apps/dump-table/.idea/dump-table.iml
+++ b/apps/dump-table/.idea/dump-table.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (dump-table at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/dump-table/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (dump-table at [/home/pypt/m/apps/dump-table/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/dump-table/.idea/mediawords.sql b/apps/dump-table/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/dump-table/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/dump-table/.idea/misc.xml b/apps/dump-table/.idea/misc.xml
index a628023bfa..219c0a49e1 100644
--- a/apps/dump-table/.idea/misc.xml
+++ b/apps/dump-table/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (dump-table at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/dump-table/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (dump-table at [/home/pypt/m/apps/dump-table/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/dump-table/.idea/sqlDataSources.xml b/apps/dump-table/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..796ff8cf62
--- /dev/null
+++ b/apps/dump-table/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="65aad2e3-8cd8-474b-924c-1765d2a6d41c" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/dump-table/docker-compose.tests.yml b/apps/dump-table/docker-compose.tests.yml
index 946442019d..7940757394 100644
--- a/apps/dump-table/docker-compose.tests.yml
+++ b/apps/dump-table/docker-compose.tests.yml
@@ -43,5 +43,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/podcast-fetch-episode/.dockerignore b/apps/elasticsearch-base/.dockerignore
similarity index 100%
rename from apps/podcast-fetch-episode/.dockerignore
rename to apps/elasticsearch-base/.dockerignore
diff --git a/apps/elasticsearch-base/Dockerfile b/apps/elasticsearch-base/Dockerfile
new file mode 100644
index 0000000000..07207b0d51
--- /dev/null
+++ b/apps/elasticsearch-base/Dockerfile
@@ -0,0 +1,69 @@
+#
+# Base image for Elasticsearch
+#
+
+FROM gcr.io/mcback/java-base:latest
+
+# Install Elasticsearch
+# (https://www.elastic.co/downloads/elasticsearch-no-jdk)
+ENV MC_ELASTICSEARCH_VERSION=7.10.2
+RUN \
+    mkdir -p /opt/elasticsearch/ && \
+    curl --fail --location --retry 3 --retry-delay 5 "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${MC_ELASTICSEARCH_VERSION}-no-jdk-linux-x86_64.tar.gz" | \
+        tar -zx -C /opt/elasticsearch/ --strip 1 && \
+    true
+
+# Add unprivileged user the service will run as
+RUN useradd -ms /bin/bash elasticsearch
+
+RUN \
+    #
+    # Data directory
+    mkdir -p /var/lib/elasticsearch/ && \
+    mkdir -p /var/lib/elasticsearch/jvm-heapdumps/ && \
+    mkdir -p /var/lib/elasticsearch/jvm-gc-logs/ && \
+    chown -R elasticsearch:elasticsearch /var/lib/elasticsearch/ && \
+    #
+    # JVM options directory
+    mkdir -p /opt/elasticsearch/config/jvm.options.d/ && \
+    chmod 775 /opt/elasticsearch/config/jvm.options.d/ && \
+    #
+    true
+
+COPY config/* /opt/elasticsearch/config/
+COPY bin/* /opt/elasticsearch/bin/
+
+# Create keystore and move it to data volume
+RUN \
+    rm -f /opt/elasticsearch/config/elasticsearch.keystore && \
+    rm -f /var/lib/elasticsearch/elasticsearch.keystore && \
+    /opt/elasticsearch/bin/elasticsearch-keystore create && \
+    mv /opt/elasticsearch/config/elasticsearch.keystore /var/lib/elasticsearch/ && \
+    ln -s /var/lib/elasticsearch/elasticsearch.keystore /opt/elasticsearch/config/elasticsearch.keystore && \
+    chown elasticsearch:elasticsearch /var/lib/elasticsearch/elasticsearch.keystore && \
+    #
+    # Keystore tool will want to write a "temporary" keystore:
+    #
+    #     ERROR: unable to create temporary keystore at
+    #     [/opt/elasticsearch/config/elasticsearch.keystore.tmp],
+    #     write permissions required for [/opt/elasticsearch/config]
+    #     or run [elasticsearch-keystore upgrade]
+    #
+    # Plus the S3 plugin insists at writing to other locations too.
+    #
+    chown -R elasticsearch:elasticsearch /opt/elasticsearch/config/ && \
+    #
+    true
+
+USER elasticsearch
+
+# Elasticsearch HTTP
+EXPOSE 9200
+
+# Elasticsearch TCP transport
+EXPOSE 9300
+
+# No "VOLUME /var/lib/elasticsearch" here because sub-images might want to
+# pre-init the volume with some data
+
+CMD ["/opt/elasticsearch/bin/elasticsearch.sh"]
diff --git a/apps/elasticsearch-base/bin/elasticsearch.sh b/apps/elasticsearch-base/bin/elasticsearch.sh
new file mode 100755
index 0000000000..4dcc391452
--- /dev/null
+++ b/apps/elasticsearch-base/bin/elasticsearch.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -e
+set -u
+
+# https://www.elastic.co/guide/en/elasticsearch/reference/current/max-number-of-threads.html
+if [ "$(ulimit -u)" != "unlimited" ] && [ $(ulimit -u) -lt 4096 ]; then
+    echo "Process limit (ulimit -u) is too low."
+    exit 1
+fi
+
+# https://www.elastic.co/guide/en/elasticsearch/reference/current/file-descriptors.html
+if [ "$(ulimit -n -S)" != "unlimited" ] && [ $(ulimit -n -S) -lt 65535 ]; then
+    echo "Soft open file limit (ulimit -n -S) is too low."
+    exit 1
+fi
+if [ "$(ulimit -n -H)" != "unlimited" ] && [ $(ulimit -n -H) -lt 65535 ]; then
+    echo "Hard open file limit (ulimit -n -H) is too low."
+    exit 1
+fi
+
+# "Set Xmx and Xms to no more than 50% of your physical RAM."
+MC_RAM_SIZE=$(/container_memory_limit.sh)
+MC_ELASTICSEARCH_MS=$((MC_RAM_SIZE / 10 * 4))
+MC_ELASTICSEARCH_MX="${MC_ELASTICSEARCH_MS}"
+
+export ES_JAVA_OPTS=""
+
+# Memory limits
+export ES_JAVA_OPTS="${ES_JAVA_OPTS} -Xms${MC_ELASTICSEARCH_MS}m"
+export ES_JAVA_OPTS="${ES_JAVA_OPTS} -Xmx${MC_ELASTICSEARCH_MX}m"
+
+# Run Elasticsearch
+exec /opt/elasticsearch/bin/elasticsearch
diff --git a/apps/elasticsearch-base/config/.dockerignore b/apps/elasticsearch-base/config/.dockerignore
new file mode 100644
index 0000000000..b3c0a37b66
--- /dev/null
+++ b/apps/elasticsearch-base/config/.dockerignore
@@ -0,0 +1 @@
+elasticsearch.keystore
diff --git a/apps/elasticsearch-base/config/.gitignore b/apps/elasticsearch-base/config/.gitignore
new file mode 100644
index 0000000000..3eb03f777e
--- /dev/null
+++ b/apps/elasticsearch-base/config/.gitignore
@@ -0,0 +1,3 @@
+# Might get created by a Docker container
+elasticsearch.keystore
+
diff --git a/apps/elk-elasticsearch/config/elasticsearch.yml b/apps/elasticsearch-base/config/elasticsearch-base.yml
similarity index 68%
rename from apps/elk-elasticsearch/config/elasticsearch.yml
rename to apps/elasticsearch-base/config/elasticsearch-base.yml
index 12fc1f5b1c..3e7ad2dfea 100644
--- a/apps/elk-elasticsearch/config/elasticsearch.yml
+++ b/apps/elasticsearch-base/config/elasticsearch-base.yml
@@ -1,5 +1,3 @@
-cluster.name: elk-elasticsearch
-node.name: elk-elasticsearch
 path.data: /var/lib/elasticsearch
 network.host: 0.0.0.0
 http.port: 9200
@@ -8,8 +6,3 @@ transport.port: 9300
 # Use single node discovery in order to disable production mode and avoid bootstrap checks
 # see https://www.elastic.co/guide/en/elasticsearch/reference/current/bootstrap-checks.html
 discovery.type: single-node
-
-# Define S3 client for log snapshots
-s3.client:
-  elk_logs:
-    protocol: https
diff --git a/apps/elk-elasticsearch/config/java.policy b/apps/elasticsearch-base/config/java.policy
similarity index 100%
rename from apps/elk-elasticsearch/config/java.policy
rename to apps/elasticsearch-base/config/java.policy
diff --git a/apps/elk-elasticsearch/config/jvm.options b/apps/elasticsearch-base/config/jvm.options
similarity index 95%
rename from apps/elk-elasticsearch/config/jvm.options
rename to apps/elasticsearch-base/config/jvm.options
index c15568722a..3590c3bb45 100644
--- a/apps/elk-elasticsearch/config/jvm.options
+++ b/apps/elasticsearch-base/config/jvm.options
@@ -18,7 +18,7 @@
 # has sufficient space
 -XX:HeapDumpPath=/var/lib/elasticsearch/jvm-heapdumps/
 
-# Update policy for S3 plugin to work
+# Update policy for plugins to work
 -Djava.security.policy=/opt/elasticsearch/config/java.policy
 
 # Log JVM errors to STDERR
diff --git a/apps/elk-elasticsearch/config/log4j2.properties b/apps/elasticsearch-base/config/log4j2.properties
similarity index 100%
rename from apps/elk-elasticsearch/config/log4j2.properties
rename to apps/elasticsearch-base/config/log4j2.properties
diff --git a/apps/elk-elasticsearch/Dockerfile b/apps/elk-elasticsearch/Dockerfile
index 4dd588dc44..754ccb43b6 100644
--- a/apps/elk-elasticsearch/Dockerfile
+++ b/apps/elk-elasticsearch/Dockerfile
@@ -2,77 +2,36 @@
 # Elasticsearch for ELK logging stack
 #
 
-FROM gcr.io/mcback/java-base:latest
+FROM gcr.io/mcback/elasticsearch-base:latest
 
-# Install Elasticsearch
-# (https://www.elastic.co/downloads/elasticsearch-no-jdk)
-ENV ELK_ELASTICSEARCH_VERSION=7.10.2
-RUN \
-    mkdir -p /opt/elasticsearch/ && \
-    curl --fail --location --retry 3 --retry-delay 5 "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ELK_ELASTICSEARCH_VERSION}-no-jdk-linux-x86_64.tar.gz" | \
-        tar -zx -C /opt/elasticsearch/ --strip 1 && \
-    true
+USER root
 
 # Install Elasticsearch Amazon S3 plugin for ILS archival
 # (we use curl to be able to configure retries and such)
 RUN \
-    curl --fail --location --retry 3 --retry-delay 5 "https://artifacts.elastic.co/downloads/elasticsearch-plugins/repository-s3/repository-s3-${ELK_ELASTICSEARCH_VERSION}.zip" > \
+    curl --fail --location --retry 3 --retry-delay 5 "https://artifacts.elastic.co/downloads/elasticsearch-plugins/repository-s3/repository-s3-${MC_ELASTICSEARCH_VERSION}.zip" > \
         /var/tmp/es-s3-plugin.zip && \
     /opt/elasticsearch/bin/elasticsearch-plugin install --batch file:///var/tmp/es-s3-plugin.zip && \
     rm /var/tmp/es-s3-plugin.zip && \
     true
 
-# Add unprivileged user the service will run as
-RUN useradd -ms /bin/bash elk
-
-RUN \
-    #
-    # Data directory
-    mkdir -p /var/lib/elasticsearch/ && \
-    mkdir -p /var/lib/elasticsearch/jvm-heapdumps/ && \
-    mkdir -p /var/lib/elasticsearch/jvm-gc-logs/ && \
-    chown -R elk:elk /var/lib/elasticsearch/ && \
-    #
-    # JVM options directory
-    mkdir -p /opt/elasticsearch/config/jvm.options.d/ && \
-    chmod 775 /opt/elasticsearch/config/jvm.options.d/ && \
-    #
-    true
-
 COPY config/* /opt/elasticsearch/config/
 COPY bin/* /opt/elasticsearch/bin/
 
 # Create keystore and move it to data volume
 RUN \
-    rm -f /opt/elasticsearch/config/elasticsearch.keystore && \
-    rm -f /var/lib/elasticsearch/elasticsearch.keystore && \
-    /opt/elasticsearch/bin/elasticsearch-keystore create && \
-    mv /opt/elasticsearch/config/elasticsearch.keystore /var/lib/elasticsearch/ && \
-    ln -s /var/lib/elasticsearch/elasticsearch.keystore /opt/elasticsearch/config/elasticsearch.keystore && \
-    chown elk:elk /var/lib/elasticsearch/elasticsearch.keystore && \
-    #
-    # Keystore tool will want to write a "temporary" keystore:
-    #
-    #     ERROR: unable to create temporary keystore at
-    #     [/opt/elasticsearch/config/elasticsearch.keystore.tmp],
-    #     write permissions required for [/opt/elasticsearch/config]
-    #     or run [elasticsearch-keystore upgrade]
     #
-    # Plus the S3 plugin insists at writing to other locations too.
-    #
-    chown -R elk:elk /opt/elasticsearch/config/ && \
+    # Merge base and ELK configs into one
+    cat \
+        /opt/elasticsearch/config/elasticsearch-base.yml \
+        /opt/elasticsearch/config/elk-elasticsearch.yml \
+        > /opt/elasticsearch/config/elasticsearch.yml && \
     #
     true
 
-USER elk
-
-# Elasticsearch HTTP
-EXPOSE 9200
-
-# Elasticsearch TCP transport
-EXPOSE 9300
+USER elasticsearch
 
 # Elasticsearch data
 VOLUME /var/lib/elasticsearch
 
-CMD ["/opt/elasticsearch/bin/elasticsearch.sh"]
+CMD ["/opt/elasticsearch/bin/elk-elasticsearch.sh"]
diff --git a/apps/elk-elasticsearch/bin/elasticsearch.sh b/apps/elk-elasticsearch/bin/elk-elasticsearch.sh
similarity index 56%
rename from apps/elk-elasticsearch/bin/elasticsearch.sh
rename to apps/elk-elasticsearch/bin/elk-elasticsearch.sh
index 5681a63dc0..677a2dcd4d 100755
--- a/apps/elk-elasticsearch/bin/elasticsearch.sh
+++ b/apps/elk-elasticsearch/bin/elk-elasticsearch.sh
@@ -24,22 +24,6 @@ fi
 
 set -u
 
-# https://www.elastic.co/guide/en/elasticsearch/reference/current/max-number-of-threads.html
-if [ "$(ulimit -u)" != "unlimited" ] && [ $(ulimit -u) -lt 4096 ]; then
-    echo "Process limit (ulimit -u) is too low."
-    exit 1
-fi
-
-# https://www.elastic.co/guide/en/elasticsearch/reference/current/file-descriptors.html
-if [ "$(ulimit -n -S)" != "unlimited" ] && [ $(ulimit -n -S) -lt 65535 ]; then
-    echo "Soft open file limit (ulimit -n -S) is too low."
-    exit 1
-fi
-if [ "$(ulimit -n -H)" != "unlimited" ] && [ $(ulimit -n -H) -lt 65535 ]; then
-    echo "Hard open file limit (ulimit -n -H) is too low."
-    exit 1
-fi
-
 # Update AWS credentials in a keystore
 echo "Update AWS credentials in a keystore..."
 echo -n "${MC_ELK_ELASTICSEARCH_SNAPSHOT_S3_ACCESS_KEY_ID}" | \
@@ -55,16 +39,5 @@ if [ ! -f /var/lib/elasticsearch/s3-snapshots-setup ]; then
     touch /var/lib/elasticsearch/s3-snapshots-setup
 fi
 
-# "Set Xmx and Xms to no more than 50% of your physical RAM."
-MC_RAM_SIZE=$(/container_memory_limit.sh)
-MC_ELASTICSEARCH_MS=$((MC_RAM_SIZE / 10 * 4))
-MC_ELASTICSEARCH_MX="${MC_ELASTICSEARCH_MS}"
-
-export ES_JAVA_OPTS=""
-
-# Memory limits
-export ES_JAVA_OPTS="${ES_JAVA_OPTS} -Xms${MC_ELASTICSEARCH_MS}m"
-export ES_JAVA_OPTS="${ES_JAVA_OPTS} -Xmx${MC_ELASTICSEARCH_MX}m"
-
-# Run Elasticsearch
-exec /opt/elasticsearch/bin/elasticsearch
+# Run Elasticsearch wrapper script
+exec /opt/elasticsearch/bin/elasticsearch.sh
diff --git a/apps/elk-elasticsearch/config/elk-elasticsearch.yml b/apps/elk-elasticsearch/config/elk-elasticsearch.yml
new file mode 100644
index 0000000000..68c42c5625
--- /dev/null
+++ b/apps/elk-elasticsearch/config/elk-elasticsearch.yml
@@ -0,0 +1,7 @@
+cluster.name: elk-elasticsearch
+node.name: elk-elasticsearch
+
+# Define S3 client for log snapshots
+s3.client:
+  elk_logs:
+    protocol: https
diff --git a/apps/elk-kibana/docker-compose.tests.yml b/apps/elk-kibana/docker-compose.tests.yml
index 26eb31d2c5..d98bca2078 100644
--- a/apps/elk-kibana/docker-compose.tests.yml
+++ b/apps/elk-kibana/docker-compose.tests.yml
@@ -49,11 +49,12 @@ services:
             - "9300:9300"
         volumes:
             - type: bind
-              source: ./../elk-elasticsearch/bin/elasticsearch.sh
-              target: /opt/elasticsearch/bin/elasticsearch.sh
+              source: ./../elk-elasticsearch/bin/elk-elasticsearch.sh
+              target: /opt/elasticsearch/bin/elk-elasticsearch.sh
             - type: bind
-              source: ./../elk-elasticsearch/config/
-              target: /opt/elasticsearch/config/
+              source: ./../elasticsearch-base/bin/elasticsearch.sh
+              target: /opt/elasticsearch/bin/elasticsearch.sh
+            # Not mounting config as it gets concatenated into a single file
         # Limit CPUs and RAM for the process to not get too greedy
         deploy:
             resources:
diff --git a/apps/export-tables-to-backup-crawler/.idea/export-tables-to-backup-crawler.iml b/apps/export-tables-to-backup-crawler/.idea/export-tables-to-backup-crawler.iml
index a44aec3bcd..3d9fff1090 100644
--- a/apps/export-tables-to-backup-crawler/.idea/export-tables-to-backup-crawler.iml
+++ b/apps/export-tables-to-backup-crawler/.idea/export-tables-to-backup-crawler.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (export-tables-to-backup-crawler at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/export-tables-to-backup-crawler/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (export-tables-to-backup-crawler at [/home/pypt/m/apps/export-tables-to-backup-crawler/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/export-tables-to-backup-crawler/.idea/mediawords.sql b/apps/export-tables-to-backup-crawler/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/export-tables-to-backup-crawler/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/export-tables-to-backup-crawler/.idea/misc.xml b/apps/export-tables-to-backup-crawler/.idea/misc.xml
index 4c079ac3cc..612d8fa8cb 100644
--- a/apps/export-tables-to-backup-crawler/.idea/misc.xml
+++ b/apps/export-tables-to-backup-crawler/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (export-tables-to-backup-crawler at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/export-tables-to-backup-crawler/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (export-tables-to-backup-crawler at [/home/pypt/m/apps/export-tables-to-backup-crawler/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/export-tables-to-backup-crawler/.idea/sqlDataSources.xml b/apps/export-tables-to-backup-crawler/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..1206505a61
--- /dev/null
+++ b/apps/export-tables-to-backup-crawler/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="378ac013-a05f-4b57-8ba4-f5c8713209e9" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/export-tables-to-backup-crawler/docker-compose.tests.yml b/apps/export-tables-to-backup-crawler/docker-compose.tests.yml
index fa6ca3e3dc..bec36c4c46 100644
--- a/apps/export-tables-to-backup-crawler/docker-compose.tests.yml
+++ b/apps/export-tables-to-backup-crawler/docker-compose.tests.yml
@@ -49,5 +49,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/extract-and-vector/.idea/extract-and-vector.iml b/apps/extract-and-vector/.idea/extract-and-vector.iml
index 6c34b252d7..374cee519e 100644
--- a/apps/extract-and-vector/.idea/extract-and-vector.iml
+++ b/apps/extract-and-vector/.idea/extract-and-vector.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (extract-and-vector at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/extract-and-vector/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (extract-and-vector at [/home/pypt/m/apps/extract-and-vector/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/extract-and-vector/.idea/mediawords.sql b/apps/extract-and-vector/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/extract-and-vector/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/extract-and-vector/.idea/misc.xml b/apps/extract-and-vector/.idea/misc.xml
index 2b539d222a..c8744769fb 100644
--- a/apps/extract-and-vector/.idea/misc.xml
+++ b/apps/extract-and-vector/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (extract-and-vector at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/extract-and-vector/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (extract-and-vector at [/home/pypt/m/apps/extract-and-vector/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/extract-and-vector/.idea/sqlDataSources.xml b/apps/extract-and-vector/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..e137dc53a1
--- /dev/null
+++ b/apps/extract-and-vector/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="75a893da-d95d-40e9-9d14-d101a5eee155" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/extract-and-vector/docker-compose.tests.yml b/apps/extract-and-vector/docker-compose.tests.yml
index b13ba25133..dc15a36094 100644
--- a/apps/extract-and-vector/docker-compose.tests.yml
+++ b/apps/extract-and-vector/docker-compose.tests.yml
@@ -69,8 +69,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/extract-article-from-page/bin/extract_article_from_page_http_server.py b/apps/extract-article-from-page/bin/extract_article_from_page_http_server.py
index 21fd9c1078..4b99f4cce9 100755
--- a/apps/extract-article-from-page/bin/extract_article_from_page_http_server.py
+++ b/apps/extract-article-from-page/bin/extract_article_from_page_http_server.py
@@ -44,6 +44,9 @@
 
 
 class ServerHandler(BaseHTTPRequestHandler):
+    # Allow HTTP/1.1 connections and so don't wait up on "Expect:" headers
+    protocol_version = "HTTP/1.1"
+
     _API_ENDPOINT_PATH = "/extract"
 
     def __json_response(self, status: int, response: dict) -> bytes:
diff --git a/apps/facebook-fetch-story-stats/.idea/facebook-fetch-story-stats.iml b/apps/facebook-fetch-story-stats/.idea/facebook-fetch-story-stats.iml
index 7329c1b21b..1a562335a1 100644
--- a/apps/facebook-fetch-story-stats/.idea/facebook-fetch-story-stats.iml
+++ b/apps/facebook-fetch-story-stats/.idea/facebook-fetch-story-stats.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.5 Docker Compose (facebook-fetch-story-stats at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/facebook-fetch-story-stats/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (facebook-fetch-story-stats at [/home/pypt/m/apps/facebook-fetch-story-stats/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/facebook-fetch-story-stats/.idea/mediawords.sql b/apps/facebook-fetch-story-stats/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/facebook-fetch-story-stats/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/facebook-fetch-story-stats/.idea/misc.xml b/apps/facebook-fetch-story-stats/.idea/misc.xml
index a5e76612f4..501124f803 100644
--- a/apps/facebook-fetch-story-stats/.idea/misc.xml
+++ b/apps/facebook-fetch-story-stats/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.5 Docker Compose (facebook-fetch-story-stats at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/facebook-fetch-story-stats/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (facebook-fetch-story-stats at [/home/pypt/m/apps/facebook-fetch-story-stats/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/facebook-fetch-story-stats/.idea/sqlDataSources.xml b/apps/facebook-fetch-story-stats/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..5a0f1f0d7a
--- /dev/null
+++ b/apps/facebook-fetch-story-stats/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="6192f4b6-e79b-44de-852f-752bd6ebc726" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/facebook-fetch-story-stats/docker-compose.tests.yml b/apps/facebook-fetch-story-stats/docker-compose.tests.yml
index 52fd7b1b3d..917c869c08 100644
--- a/apps/facebook-fetch-story-stats/docker-compose.tests.yml
+++ b/apps/facebook-fetch-story-stats/docker-compose.tests.yml
@@ -53,8 +53,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/import-solr-data-for-testing/.idea/import-solr-data-for-testing.iml b/apps/import-solr-data-for-testing/.idea/import-solr-data-for-testing.iml
index 418a15a742..4ea74d3211 100644
--- a/apps/import-solr-data-for-testing/.idea/import-solr-data-for-testing.iml
+++ b/apps/import-solr-data-for-testing/.idea/import-solr-data-for-testing.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.7 Docker (gcr.io/mcback/import-solr-data-for-testing:latest)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (import-solr-data-for-testing at [/home/pypt/m/apps/import-solr-data-for-testing/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/import-solr-data-for-testing/.idea/mediawords.sql b/apps/import-solr-data-for-testing/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/import-solr-data-for-testing/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/import-solr-data-for-testing/.idea/misc.xml b/apps/import-solr-data-for-testing/.idea/misc.xml
index 140198e1e6..6ad961cfaf 100644
--- a/apps/import-solr-data-for-testing/.idea/misc.xml
+++ b/apps/import-solr-data-for-testing/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.7 Docker (gcr.io/mcback/import-solr-data-for-testing:latest)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (import-solr-data-for-testing at [/home/pypt/m/apps/import-solr-data-for-testing/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/import-solr-data-for-testing/.idea/sqlDataSources.xml b/apps/import-solr-data-for-testing/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..a1a49e3292
--- /dev/null
+++ b/apps/import-solr-data-for-testing/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="ae978ad1-d097-4d36-bfc6-94a454c6f78f" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/import-solr-data-for-testing/docker-compose.tests.yml b/apps/import-solr-data-for-testing/docker-compose.tests.yml
index 679bb5b161..27fc041b8c 100644
--- a/apps/import-solr-data-for-testing/docker-compose.tests.yml
+++ b/apps/import-solr-data-for-testing/docker-compose.tests.yml
@@ -50,8 +50,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/import-solr-data/.idea/import-solr-data.iml b/apps/import-solr-data/.idea/import-solr-data.iml
index a87b798c24..635c58f05c 100644
--- a/apps/import-solr-data/.idea/import-solr-data.iml
+++ b/apps/import-solr-data/.idea/import-solr-data.iml
@@ -9,7 +9,7 @@
       <sourceFolder url="file://$MODULE_DIR$/src/python" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/bin" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.7 Docker Compose (import-solr-data at [/home/pypt/Dropbox/etc-MediaCloud/trunk/apps/import-solr-data/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (import-solr-data at [/home/pypt/m/apps/import-solr-data/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/import-solr-data/.idea/mediawords.sql b/apps/import-solr-data/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/import-solr-data/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/import-solr-data/.idea/misc.xml b/apps/import-solr-data/.idea/misc.xml
index 14bbb3ee03..4454819bf6 100644
--- a/apps/import-solr-data/.idea/misc.xml
+++ b/apps/import-solr-data/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.7 Docker Compose (import-solr-data at [/home/pypt/Dropbox/etc-MediaCloud/trunk/apps/import-solr-data/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (import-solr-data at [/home/pypt/m/apps/import-solr-data/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/import-solr-data/.idea/sqlDataSources.xml b/apps/import-solr-data/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..0f7b4d038e
--- /dev/null
+++ b/apps/import-solr-data/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="1ee9d429-a5f9-4e02-98d5-84492de1422f" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/import-solr-data/docker-compose.tests.yml b/apps/import-solr-data/docker-compose.tests.yml
index bb70f29a81..ebf9bfbd6d 100644
--- a/apps/import-solr-data/docker-compose.tests.yml
+++ b/apps/import-solr-data/docker-compose.tests.yml
@@ -75,8 +75,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/import-stories-feedly/.idea/import-stories-feedly.iml b/apps/import-stories-feedly/.idea/import-stories-feedly.iml
index e3b6952c3c..61a389860d 100644
--- a/apps/import-stories-feedly/.idea/import-stories-feedly.iml
+++ b/apps/import-stories-feedly/.idea/import-stories-feedly.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (import-stories-feedly at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/import-stories-feedly/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (import-stories-feedly at [/home/pypt/m/apps/import-stories-feedly/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/import-stories-feedly/.idea/mediawords.sql b/apps/import-stories-feedly/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/import-stories-feedly/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/import-stories-feedly/.idea/misc.xml b/apps/import-stories-feedly/.idea/misc.xml
index 884ce73432..62346b8b3c 100644
--- a/apps/import-stories-feedly/.idea/misc.xml
+++ b/apps/import-stories-feedly/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (import-stories-feedly at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/import-stories-feedly/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (import-stories-feedly at [/home/pypt/m/apps/import-stories-feedly/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/import-stories-feedly/.idea/sqlDataSources.xml b/apps/import-stories-feedly/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..56932b0cf7
--- /dev/null
+++ b/apps/import-stories-feedly/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="06cfc8f4-1088-484b-a299-c2539de21650" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/import-stories-feedly/docker-compose.tests.yml b/apps/import-stories-feedly/docker-compose.tests.yml
index a0b0c338b0..dfc441ac98 100644
--- a/apps/import-stories-feedly/docker-compose.tests.yml
+++ b/apps/import-stories-feedly/docker-compose.tests.yml
@@ -52,5 +52,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/import-stories-scrapehtml/.idea/import-stories-scrapehtml.iml b/apps/import-stories-scrapehtml/.idea/import-stories-scrapehtml.iml
index c0e0968c5c..9d92328955 100644
--- a/apps/import-stories-scrapehtml/.idea/import-stories-scrapehtml.iml
+++ b/apps/import-stories-scrapehtml/.idea/import-stories-scrapehtml.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (import-stories-scrapehtml at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/import-stories-scrapehtml/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (import-stories-scrapehtml at [/home/pypt/m/apps/import-stories-scrapehtml/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/import-stories-scrapehtml/.idea/mediawords.sql b/apps/import-stories-scrapehtml/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/import-stories-scrapehtml/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/import-stories-scrapehtml/.idea/misc.xml b/apps/import-stories-scrapehtml/.idea/misc.xml
index cd8728a737..9c2430e515 100644
--- a/apps/import-stories-scrapehtml/.idea/misc.xml
+++ b/apps/import-stories-scrapehtml/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (import-stories-scrapehtml at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/import-stories-scrapehtml/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (import-stories-scrapehtml at [/home/pypt/m/apps/import-stories-scrapehtml/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/import-stories-scrapehtml/.idea/sqlDataSources.xml b/apps/import-stories-scrapehtml/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..a29f210cd2
--- /dev/null
+++ b/apps/import-stories-scrapehtml/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="4b8a10b3-df9a-412d-90b1-6a0cd594c547" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/import-stories-scrapehtml/docker-compose.tests.yml b/apps/import-stories-scrapehtml/docker-compose.tests.yml
index 7feb9e03b1..f48a47cda7 100644
--- a/apps/import-stories-scrapehtml/docker-compose.tests.yml
+++ b/apps/import-stories-scrapehtml/docker-compose.tests.yml
@@ -52,5 +52,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/mail-postfix-server/Dockerfile b/apps/mail-postfix-server/Dockerfile
index 6c475f995c..039bf1b029 100644
--- a/apps/mail-postfix-server/Dockerfile
+++ b/apps/mail-postfix-server/Dockerfile
@@ -50,13 +50,16 @@ RUN \
     #
     # Filter out "Received:" and some other headers
     postconf -e header_checks=regexp:/etc/postfix/header_checks && \
-    postconf -e mime_header_checks=regexp:/etc/postfix/header_checks && \
     postconf -e smtp_header_checks=regexp:/etc/postfix/header_checks && \
     #
     # Don't require TLS as local clients are trusted
     postconf -e smtp_tls_security_level=may && \
     postconf -e smtpd_tls_security_level=none && \
     #
+    # Make sure default headers (e.g. Message-Id, date) are present
+    postconf -e always_add_missing_headers=yes && \
+    postconf -e local_header_rewrite_clients=permit_inet_interfaces && \
+    #
     # Disable chroot on all services as it doesn't play well with a mounted
     # volume, e.g. "smtpd" is unable to access libnss after a chroot and thus
     # resolve OpenDKIM container.
diff --git a/apps/mail-postfix-server/docker-compose.tests.yml b/apps/mail-postfix-server/docker-compose.tests.yml
index babdaa376d..1c51ca0677 100644
--- a/apps/mail-postfix-server/docker-compose.tests.yml
+++ b/apps/mail-postfix-server/docker-compose.tests.yml
@@ -4,15 +4,36 @@ services:
 
     # Service to use for testing the mail service
     #
-    # Usage:
+    # Steps to test:
     #
-    #     host$ ./dev/run.py mail-postfix-server bash
-    #     container$ sendmail "your@email.com"
+    #     1) host$ ./dev/run.py mail-postfix-server bash
+    #     2) (new terminal window) host$ docker ps
+    #     3) find container with name ending in 'mail-postfix-server-actual_1'
+    #     4) host$ docker exec -it some_string_mail-postfix-server-actual_1 bash
+    #     5) container$ ./postfix.sh
+    #     6) open new terminal window on your host machine
+    #     7) host$ docker exec -it some_string_mail-postfix-server-actual_1 bash
+    #     8) follow instructions at URL below to create a test mail.txt file and send to your email address from the container
+    #     https://clients.javapipe.com/knowledgebase/132/How-to-Test-Sendmail-From-Command-Line-on-Linux.html
     #
+    #     Alternatively, if you want to test via the send_email() method (https://github.com/mediacloud/backend/blob/master/apps/common/src/python/mediawords/util/mail.py#L73),
+    #     or test changes to said method, to you can disregard steps 7-8 above and instead:
+    #     7) host$ docker ps
+    #     8) Find mail-postfix-server container ID
+    #     9) host$ docker exec -it some_string_mail-postfix-server
+    #     10) $container python3
+    #     11) >> from mediawords.util.mail import *
+    #     12) >> test_message = Message(to='your@email.com', subject='test postfix', text_body=None, html_body='<p>hi</p>')
+    #     13) >> send_email(test_message)
+    #    
     mail-postfix-server:
         image: gcr.io/mcback/common:latest
         init: true
         stop_signal: SIGKILL
+        volumes:
+            - type: bind
+              source: ./../common/src/
+              target: /opt/mediacloud/src/common/
         depends_on:
             - mail-postfix-server-actual
 
@@ -21,7 +42,7 @@ services:
         image: gcr.io/mcback/mail-postfix-server:latest
         init: true
         stop_signal: SIGKILL
-        # "docker exec" into a container and run Postfix manually (/postfix.sh):
+        # "docker exec" into a container and run Postfix manually (./postfix.sh):
         command: sleep infinity
         # To be able to set /proc/sys/kernel/yama/ptrace_scope:
         privileged: true
diff --git a/apps/mail-postfix-server/header_checks b/apps/mail-postfix-server/header_checks
index 0b5347f5fe..d23d0795d6 100644
--- a/apps/mail-postfix-server/header_checks
+++ b/apps/mail-postfix-server/header_checks
@@ -1,5 +1,4 @@
 /^Received:.*with ESMTP /         IGNORE
 /^X-Originating-IP:/    IGNORE
 /^X-Mailer:/            IGNORE
-/^Mime-Version:/        IGNORE
 /^User-Agent:/          IGNORE
\ No newline at end of file
diff --git a/apps/munin-cron/Dockerfile b/apps/munin-cron/Dockerfile
index 27e617e49f..ab512c923f 100644
--- a/apps/munin-cron/Dockerfile
+++ b/apps/munin-cron/Dockerfile
@@ -4,6 +4,9 @@
 
 FROM gcr.io/mcback/cron-base:latest
 
+# FIXME
+RUN apt-get -y update
+
 # Install packages
 RUN \
     #
@@ -43,6 +46,8 @@ COPY munin-conf.d/ /etc/munin/munin-conf.d/
 # Overwrite crontab with our own
 COPY crontab /etc/cron.d/munin
 
+COPY bin/munin-cron.sh /
+
 # Volume for RRD data (shared with munin-fastcgi-graph)
 VOLUME /var/lib/munin/
 
@@ -50,3 +55,6 @@ VOLUME /var/lib/munin/
 VOLUME /var/cache/munin/www/
 
 # No USER because Cron will run the script as unprivileged user itself
+
+# Use our own wrapper for 
+CMD ["/munin-cron.sh"]
diff --git a/apps/munin-cron/bin/munin-cron.sh b/apps/munin-cron/bin/munin-cron.sh
new file mode 100755
index 0000000000..a44d39d8a4
--- /dev/null
+++ b/apps/munin-cron/bin/munin-cron.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -e
+
+if [ -z "$MC_MUNIN_CRON_ALERT_EMAIL" ]; then
+    echo "MC_MUNIN_CRON_ALERT_EMAIL (email address to send email alerts to) is not set."
+    exit 1
+fi
+
+set -u
+
+# Set up alerting
+ALERTS_CONF_FILE="/etc/munin/munin-conf.d/alerts.conf"
+echo -n > "${ALERTS_CONF_FILE}"
+chmod 644 "${ALERTS_CONF_FILE}"
+
+# Pretty weird way to print a bunch of dollar signs to a file but Munin doesn't make it easy
+echo -n 'contact.mediacloud.command ' >> "${ALERTS_CONF_FILE}"
+echo -n 'mail -s "[Munin] ' >> "${ALERTS_CONF_FILE}"
+echo -n '${if:cfields CRITICAL}${if:wfields WARNING}' >> "${ALERTS_CONF_FILE}"
+echo -n '${if:fofields OK}${if:ufields UNKNOWN}' >> "${ALERTS_CONF_FILE}"
+echo -n ' -> ${var:graph_title} ' >> "${ALERTS_CONF_FILE}"
+echo -n '${if:wfields -> ${loop<,>:wfields ${var:label}=${var:value}}}' >> "${ALERTS_CONF_FILE}"
+echo -n '${if:cfields -> ${loop<,>:cfields ${var:label}=${var:value}}}' >> "${ALERTS_CONF_FILE}"
+echo -n '${if:fofields -> ${loop<,>:fofields ${var:label}=${var:value}}}' >> "${ALERTS_CONF_FILE}"
+echo -n '" ' >> "${ALERTS_CONF_FILE}"
+
+# Escape "@"
+echo -n "${MC_MUNIN_CRON_ALERT_EMAIL}" | sed 's/@/\\@/g' >> "${ALERTS_CONF_FILE}"
+
+echo >> "${ALERTS_CONF_FILE}"
+
+# Start Cron daemon wrapper from cron-base
+exec /cron.sh
diff --git a/apps/munin-cron/docker-compose.tests.yml b/apps/munin-cron/docker-compose.tests.yml
index e24e465693..055e88673f 100644
--- a/apps/munin-cron/docker-compose.tests.yml
+++ b/apps/munin-cron/docker-compose.tests.yml
@@ -6,10 +6,15 @@ services:
         image: gcr.io/mcback/munin-cron:latest
         init: true
         stop_signal: SIGKILL
+        environment:
+            MC_MUNIN_CRON_ALERT_EMAIL: "alerts@testmediacloud.ml"
         volumes:
             - type: bind
-              source: ./munin-conf.d/
-              target: /etc/munin/munin-conf.d/
+              source: ./munin-conf.d/host.conf
+              target: /etc/munin/munin-conf.d/host.conf
+            - type: bind
+              source: ./bin/munin-cron.sh
+              target: /munin-cron.sh
             - type: bind
               source: ./../cron-base/bin/cron.sh
               target: /cron.sh
@@ -58,8 +63,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/munin-cron/munin-conf.d/.gitignore b/apps/munin-cron/munin-conf.d/.gitignore
new file mode 100644
index 0000000000..7842d2c628
--- /dev/null
+++ b/apps/munin-cron/munin-conf.d/.gitignore
@@ -0,0 +1,3 @@
+# Gets autogenerated on every start
+alerts.conf
+
diff --git a/apps/munin-cron/munin-conf.d/alerts.conf b/apps/munin-cron/munin-conf.d/alerts.conf
deleted file mode 100644
index 583489fc94..0000000000
--- a/apps/munin-cron/munin-conf.d/alerts.conf
+++ /dev/null
@@ -1,2 +0,0 @@
-contact.hroberts.command mail -s "[Munin] ${if:cfields CRITICAL}${if:wfields WARNING}${if:fofields OK}${if:ufields UNKNOWN} -> ${var:graph_title} ${if:wfields -> ${loop<,>:wfields ${var:label}=${var:value}}}${if:cfields -> ${loop<,>:cfields ${var:label}=${var:value}}}${if:fofields -> ${loop<,>:fofields ${var:label}=${var:value}}}" hroberts\@mediacloud.org
-contact.lvaliukas.command mail -s "[Munin] ${if:cfields CRITICAL}${if:wfields WARNING}${if:fofields OK}${if:ufields UNKNOWN} -> ${var:graph_title} ${if:wfields -> ${loop<,>:wfields ${var:label}=${var:value}}}${if:cfields -> ${loop<,>:cfields ${var:label}=${var:value}}}${if:fofields -> ${loop<,>:fofields ${var:label}=${var:value}}}" linas\@mediacloud.org
diff --git a/apps/munin-httpd/docker-compose.tests.yml b/apps/munin-httpd/docker-compose.tests.yml
index 17c380dfb8..0c8403be69 100644
--- a/apps/munin-httpd/docker-compose.tests.yml
+++ b/apps/munin-httpd/docker-compose.tests.yml
@@ -36,6 +36,8 @@ services:
         image: gcr.io/mcback/munin-cron:latest
         init: true
         stop_signal: SIGKILL
+        environment:
+            MC_MUNIN_CRON_ALERT_EMAIL: "FIXME@mediacloud.org"
         volumes:
             - type: bind
               source: ./../munin-cron/munin-conf.d/
@@ -88,8 +90,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/munin-node/docker-compose.tests.yml b/apps/munin-node/docker-compose.tests.yml
index 9a99b0065b..d9bdc527e2 100644
--- a/apps/munin-node/docker-compose.tests.yml
+++ b/apps/munin-node/docker-compose.tests.yml
@@ -43,8 +43,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/munin-node/plugins/mc_postgresql_relfrozenxid_age b/apps/munin-node/plugins/mc_postgresql_relfrozenxid_age
new file mode 100755
index 0000000000..67f04d92f2
--- /dev/null
+++ b/apps/munin-node/plugins/mc_postgresql_relfrozenxid_age
@@ -0,0 +1,51 @@
+#!/usr/bin/env perl
+#
+# https://blog.crunchydata.com/blog/managing-transaction-id-wraparound-in-postgresql
+#
+
+use strict;
+use warnings;
+
+use FindBin;
+use lib $FindBin::Bin;
+
+use MediaWords::Munin;
+
+MediaWords::Munin::plugin({
+    title => 'Tables with old relfrozenxid',
+    info => 'Number of tables with old relfrozenxid',
+    vlabel => 'Tables',
+    metrics => [
+        {
+            label => 'Tables',
+            color => $COLOR1,
+
+            # Even a single table can mess up autovacuum
+            critical => { to => 1 },
+
+            value => sub {
+                
+                my $db = shift;
+
+                my $table_count = $db->query(<<SQL
+                    SELECT count(c.oid::regclass)
+                    FROM pg_class c
+                        JOIN pg_namespace n on c.relnamespace = n.oid
+                    WHERE relkind IN ('r', 't', 'm') 
+                      AND n.nspname NOT IN ('pg_toast')
+                      AND age(c.relfrozenxid) > 1000000000
+SQL
+                )->flat->[0];
+
+                unless ( defined $table_count ) {
+                    print STDERR "Unable to list tables with old relfrozenxid.\n";
+
+                    # Returning a non-zero value to trigger the "CRITICAL" alert
+                    return 1;
+                }
+
+                return $table_count;
+            },
+        },
+    ],
+});
diff --git a/apps/munin-node/plugins/mc_solr_sentences_last_day b/apps/munin-node/plugins/mc_solr_sentences_last_day
deleted file mode 100755
index dd33519b50..0000000000
--- a/apps/munin-node/plugins/mc_solr_sentences_last_day
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-
-use FindBin;
-use lib $FindBin::Bin;
-
-use MediaWords::Munin;
-use JSON;
-use URI;
-use LWP::Simple qw($ua get);
-
-MediaWords::Munin::plugin({
-    title => 'Solr sentences in last day',
-    info => 'Solr imported sentences in the last day, as reported by Solr',
-    vlabel => 'Count',
-    metrics => [
-        {
-            label => 'Solr sentences',
-            color => $COLOR2,
-            warning => { from => sub {
-                if ( MediaWords::Munin::should_expect_weekend_traffic() ) {
-                    return 2_500_000;
-                } else {
-                    return 4_500_000;
-                }
-            } },
-            critical => { from => sub {
-                if ( MediaWords::Munin::should_expect_weekend_traffic() ) {
-                    return 2_000_000;
-                } else {
-                    return 4_000_000;
-                }
-            } },
-            value => sub {
-
-                my $db = shift;
-
-                my $solr_url = $ENV{ 'MC_SOLR_URL' };
-                unless ( defined $solr_url ) {
-                    say STDERR "Solr URL is not set, falling back to the default one.";
-                    $solr_url = 'http://127.0.0.1:8983/solr';
-                }
-
-                my $stories_id_from_last_day = $db->query(<<SQL
-                    SELECT stories_id AS stories_id_from_last_day
-                    FROM stories
-                    WHERE collect_date >= NOW() - '1 day'::interval
-                    ORDER BY collect_date
-                    LIMIT 1
-SQL
-                )->flat->[0];
-                unless ( defined $stories_id_from_last_day ) {
-                    print STDERR "No stories since yesterday\n";
-                    return 0;
-                }
-
-                my $solr_uri = URI->new( $solr_url )->canonical;
-                my @solr_path_segments = $solr_uri->path_segments;
-                push ( @solr_path_segments, 'collection1' );
-                push ( @solr_path_segments, 'select' );
-                $solr_uri->path_segments( @solr_path_segments );
-
-                $solr_uri->query_form(
-                    q => 'stories_id:[' . $stories_id_from_last_day . ' TO *]',
-                    rows => 0,
-                    wt => 'json',
-                    indent => 'true',
-                );
-
-                $ua->timeout( 10 );
-                my $response;
-                eval {
-                    $response = get( $solr_uri->as_string );
-                };
-                if ( $@ or (! $response )) {
-                    die "Unable to get response from Solr: $@\n";
-                }
-
-                my $json_response;
-                eval {
-                    $json_response = decode_json( $response );
-                };
-                if ( $@ or (! $json_response )) {
-                    die "Unable to decode JSON response: $@\n";
-                }
-
-                my $num_found = $json_response->{ response }->{ numFound };
-                unless ( defined $num_found ) {
-                    die "Unable to read /response/numFound key";
-                }
-                
-                return $num_found;
-            },
-        },
-    ],
-});
diff --git a/apps/munin-node/plugins/mc_websites_up b/apps/munin-node/plugins/mc_websites_up
index 8e66a06db8..869bd1f2fb 100755
--- a/apps/munin-node/plugins/mc_websites_up
+++ b/apps/munin-node/plugins/mc_websites_up
@@ -30,7 +30,7 @@ MediaWords::Munin::plugin({
                 my $response = $ua->get('https://api.mediacloud.org/api/v2/stories_public/list');
 
                 # Don't test whether request was successful (because it wasn't)
-                if ( $response->decoded_content =~ /Invalid API key or authentication cookie/i ) {
+                if ( $response->decoded_content =~ /Invalid API key/i ) {
                     # Up
                     return 1;
                 } else {
diff --git a/apps/nytlabels-annotator/.idea/misc.xml b/apps/nytlabels-annotator/.idea/misc.xml
index 62c91cd3b1..dd2f82cf96 100644
--- a/apps/nytlabels-annotator/.idea/misc.xml
+++ b/apps/nytlabels-annotator/.idea/misc.xml
@@ -3,5 +3,5 @@
   <component name="JavaScriptSettings">
     <option name="languageLevel" value="ES6" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker (gcr.io/mcback/nytlabels-annotator:latest) (2)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker (gcr.io/mcback/nytlabels-annotator:latest)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/nytlabels-annotator/.idea/nytlabels-annotator.iml b/apps/nytlabels-annotator/.idea/nytlabels-annotator.iml
index f1dab97a30..3e3e8c191c 100644
--- a/apps/nytlabels-annotator/.idea/nytlabels-annotator.iml
+++ b/apps/nytlabels-annotator/.idea/nytlabels-annotator.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker (gcr.io/mcback/nytlabels-annotator:latest) (2)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker (gcr.io/mcback/nytlabels-annotator:latest)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/nytlabels-annotator/Dockerfile b/apps/nytlabels-annotator/Dockerfile
index 4cd13da3ba..0d33d8845b 100644
--- a/apps/nytlabels-annotator/Dockerfile
+++ b/apps/nytlabels-annotator/Dockerfile
@@ -5,6 +5,8 @@
 FROM gcr.io/mcback/base:latest
 
 RUN \
+    # FIXME remove once the base image gets updated
+    apt-get -y update && \
     #
     # Install model fetch dependencies
     apt-get -y --no-install-recommends install brotli && \
@@ -25,29 +27,29 @@ RUN /dl_to_stdout.sh "$MODEL_URL/GoogleNews-vectors-negative300.stripped.shelve.
 RUN /dl_to_stdout.sh "$MODEL_URL/scaler.onnx" > scaler.onnx
 
 RUN /dl_to_stdout.sh "$MODEL_URL/all_descriptors.onnx.br" | \
-        brotli -d > all_descriptors.onnx
+        brotli -d > allDescriptors.onnx
 RUN /dl_to_stdout.sh "$MODEL_URL/all_descriptors.txt.br" | \
-        brotli -d > all_descriptors.txt
+        brotli -d > allDescriptors.txt
 
 RUN /dl_to_stdout.sh "$MODEL_URL/descriptors_3000.onnx.br" | \
-        brotli -d > descriptors_3000.onnx
+        brotli -d > descriptors3000.onnx
 RUN /dl_to_stdout.sh "$MODEL_URL/descriptors_3000.txt.br" | \
-        brotli -d > descriptors_3000.txt
+        brotli -d > descriptors3000.txt
 
 RUN /dl_to_stdout.sh "$MODEL_URL/descriptors_600.onnx.br" | \
-        brotli -d > descriptors_600.onnx
+        brotli -d > descriptors600.onnx
 RUN /dl_to_stdout.sh "$MODEL_URL/descriptors_600.txt.br" | \
-        brotli -d > descriptors_600.txt
+        brotli -d > descriptors600.txt
 
 RUN /dl_to_stdout.sh "$MODEL_URL/descriptors_with_taxonomies.onnx.br" | \
-        brotli -d > descriptors_with_taxonomies.onnx
+        brotli -d > descriptorsAndTaxonomies.onnx
 RUN /dl_to_stdout.sh "$MODEL_URL/descriptors_with_taxonomies.txt.br" | \
-        brotli -d > descriptors_with_taxonomies.txt
+        brotli -d > descriptorsAndTaxonomies.txt
 
 RUN /dl_to_stdout.sh "$MODEL_URL/just_taxonomies.onnx.br" | \
-        brotli -d > just_taxonomies.onnx
+        brotli -d > taxonomies.onnx
 RUN /dl_to_stdout.sh "$MODEL_URL/just_taxonomies.txt.br" | \
-        brotli -d > just_taxonomies.txt
+        brotli -d > taxonomies.txt
 
 # Install NLTK data
 RUN \
@@ -84,6 +86,11 @@ RUN \
 WORKDIR /usr/src/crappy-predict-news-labels/
 COPY src/crappy-predict-news-labels/requirements.txt /usr/src/crappy-predict-news-labels/
 RUN \
+    #
+    # OpenMP for onnxruntime speed up
+    apt-get -y --no-install-recommends install libgomp1 && \
+    #
+    # The rest
     pip3 install -r requirements.txt && \
     rm -rf /root/.cache/ && \
     true
@@ -103,4 +110,4 @@ STOPSIGNAL SIGTERM
 
 USER nobody
 
-CMD ["nytlabels_http_server.py"]
+CMD ["nytlabels.sh"]
diff --git a/apps/nytlabels-annotator/README.md b/apps/nytlabels-annotator/README.md
index 33df58c79f..f20423ebe3 100644
--- a/apps/nytlabels-annotator/README.md
+++ b/apps/nytlabels-annotator/README.md
@@ -55,6 +55,14 @@ and then `POST` said file as JSON to the annotator:
 ```bash
 echo '{}' | \
   jq --arg key0 text --arg value0 "$(cat test.txt)" '. | .[$key0]=$value0' | \
-  curl --header "Content-Type: application/json" -X POST --data-binary @- http://127.0.0.1:8080/predict.json | \
+  curl --verbose --silent --trace-time --header "Content-Type: application/json" -X POST --data-binary @- http://127.0.0.1:8080/predict.json | \
   jq ".descriptors600"
 ```
+
+Alternatively, to try out just the `descriptors600` model:
+
+```bash
+echo '{"models": ["descriptors600"]}' | \
+  jq --arg key0 text --arg value0 "$(cat test.txt)" '. | .[$key0]=$value0' | \
+  curl --verbose --silent --trace-time --header "Content-Type: application/json" -X POST --data-binary @- http://127.0.0.1:8080/predict.json
+```
diff --git a/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels.py b/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels.py
index c07629c68c..d7a2911689 100644
--- a/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels.py
+++ b/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels.py
@@ -1,7 +1,8 @@
 import dataclasses
+import multiprocessing
 import os
 import shelve
-from typing import List
+from typing import List, Optional
 
 from nltk.data import load as load_nltk_data
 from nltk.tokenize.destructive import NLTKWordTokenizer
@@ -87,13 +88,28 @@ class MultiLabelPredict(object):
         '_embedding_size',
     ]
 
-    def __init__(self, model_path: str, labels_path: str):
+    def __init__(self, model_path: str, labels_path: str, num_threads: Optional[int] = None):
         if not os.path.isfile(model_path):
             raise RuntimeError(f"Model was not found in {model_path}")
         if not os.path.isfile(labels_path):
             raise RuntimeError(f"Model labels were not found in {labels_path}")
 
-        self._model = onnxruntime.InferenceSession(model_path)
+        if num_threads is None:
+            num_threads = multiprocessing.cpu_count()
+
+        options = onnxruntime.SessionOptions()
+        options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
+
+        # Seems to be slightly slower:
+        # options.execution_mode = onnxruntime.ExecutionMode.ORT_PARALLEL
+
+        # Not really used without ORT_PARALLEL:
+        options.inter_op_num_threads = num_threads
+        options.intra_op_num_threads = num_threads
+
+        options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+
+        self._model = onnxruntime.InferenceSession(path_or_bytes=model_path)
         self._labels = open(labels_path, 'r').read().splitlines()
 
         _, self._sample_length, self._embedding_size = self._model.get_inputs()[0].shape
diff --git a/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels.sh b/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels.sh
new file mode 100755
index 0000000000..73263c2105
--- /dev/null
+++ b/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -u
+set -e
+
+PWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+exec "$PWD/nytlabels_http_server.py" --num_threads "$(/container_cpu_limit.sh)"
diff --git a/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels_http_server.py b/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels_http_server.py
index 0f0205ae41..951de5e6b5 100755
--- a/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels_http_server.py
+++ b/apps/nytlabels-annotator/src/crappy-predict-news-labels/nytlabels_http_server.py
@@ -4,41 +4,35 @@
 NYTLabels annotator HTTP service.
 """
 
-import dataclasses
+import argparse
 import json
-import operator
 import os
+import pprint
 from http import HTTPStatus
 from http.server import HTTPServer, BaseHTTPRequestHandler
-from sys import argv
-from typing import Union, Dict, List
+from typing import Union, Dict, List, Optional, Type
 
 from self_test_input import SELF_TEST_INPUT
 
 from nytlabels import Text2ScaledVectors, MultiLabelPredict
 
-
-@dataclasses.dataclass(frozen=True)
-class _ModelDescriptor(object):
-    basename: str
-    json_key: str
+# For each key there must exist a model ONNX file and a list of labels with a given basename
+ALL_MODELS = [
+    'allDescriptors',
+    'descriptors3000',
+    'descriptors600',
+    'descriptorsAndTaxonomies',
+    'taxonomies',
+]
 
 
 class _Predictor(object):
     __slots__ = [
-        'text2vectors',
-        'models',
-    ]
-
-    _MODEL_DESCRIPTORS = [
-        _ModelDescriptor(basename='all_descriptors', json_key='allDescriptors'),
-        _ModelDescriptor(basename='descriptors_3000', json_key='descriptors3000'),
-        _ModelDescriptor(basename='descriptors_600', json_key='descriptors600'),
-        _ModelDescriptor(basename='descriptors_with_taxonomies', json_key='descriptorsAndTaxonomies'),
-        _ModelDescriptor(basename='just_taxonomies', json_key='taxonomies'),
+        '__text2vectors',
+        '__models',
     ]
 
-    def __init__(self):
+    def __init__(self, num_threads: Optional[int]):
 
         pwd = os.path.dirname(os.path.abspath(__file__))
         models_dir = os.path.join(pwd, 'models')
@@ -46,24 +40,25 @@ def __init__(self):
             raise RuntimeError(f"Models path should be directory: {models_dir}")
 
         print("Loading scaler and word2vec...")
-        self.text2vectors = Text2ScaledVectors(
+        self.__text2vectors = Text2ScaledVectors(
             word2vec_shelve_path=os.path.join(models_dir, 'GoogleNews-vectors-negative300.stripped.shelve'),
             scaler_path=os.path.join(models_dir, 'scaler.onnx'),
         )
         print("Scaler and word2vec loaded.")
 
         print("Loading models...")
-        self.models = dict()
+        self.__models = dict()
 
         # Make sure all models have the sample sample length and embedding size as we vector text only once
         sample_length = None
         embedding_size = None
 
-        for model_descriptor in self._MODEL_DESCRIPTORS:
-            print(f"    Loading '{model_descriptor.basename}'...")
+        for model_name in ALL_MODELS:
+            print(f"    Loading '{model_name}'...")
             model = MultiLabelPredict(
-                model_path=os.path.join(models_dir, f"{model_descriptor.basename}.onnx"),
-                labels_path=os.path.join(models_dir, f"{model_descriptor.basename}.txt"),
+                model_path=os.path.join(models_dir, f"{model_name}.onnx"),
+                labels_path=os.path.join(models_dir, f"{model_name}.txt"),
+                num_threads=num_threads,
             )
 
             if sample_length and embedding_size:
@@ -72,39 +67,82 @@ def __init__(self):
                 sample_length = model.sample_length()
                 embedding_size = model.embedding_size()
 
-            self.models[model_descriptor] = model
+            self.__models[model_name] = model
         print("Models loaded.")
 
         print("Running self-test...\n")
-        vectors = self.text2vectors.transform(
-            text=SELF_TEST_INPUT,
+        test_result = self.predict(text=SELF_TEST_INPUT, enabled_model_names=ALL_MODELS)
+        pp = pprint.PrettyPrinter(indent=4, width=1024)
+        pp.pprint(test_result)
+        print("Done running self-test.")
+
+    def predict(self, text: str, enabled_model_names: List[str]) -> Dict[str, List[Dict[str, str]]]:
+
+        # Sample length / embedding size is the same for all models
+        first_model = self.__models[list(self.__models.keys())[0]]
+        sample_length = first_model.sample_length()
+        embedding_size = first_model.embedding_size()
+
+        vectors = self.__text2vectors.transform(
+            text=text,
             sample_length=sample_length,
             embedding_size=embedding_size,
         )
-        for model_descriptor in sorted(self._MODEL_DESCRIPTORS, key=operator.attrgetter('basename')):
-            print(f"Model '{model_descriptor.basename}':")
-            model = self.models[model_descriptor]
+
+        result = dict()
+
+        for model_name in enabled_model_names:
+            model = self.__models[model_name]
             predictions = model.predict(x_matrix=vectors)
-            for prediction in predictions:
-                print(f"  * Label: {prediction.label}, score: {prediction.score:.6f}")
-            assert len(predictions), f"Some predictions should be returned by {model.__class__.__name__}"
-            print()
-        print("Done running self-test.")
+            result[model_name] = [
+                {'label': x.label, 'score': "{0:.5f}".format(x.score)} for x in predictions
+            ]
+
+        return result
 
 
 # noinspection PyPep8Naming
 class NYTLabelsRequestHandler(BaseHTTPRequestHandler):
-    _PREDICTOR = _Predictor()
+    # Allow HTTP/1.1 connections and so don't wait up on "Expect:" headers
+    protocol_version = "HTTP/1.1"
+
+    _PREDICTOR = None
+
+    @classmethod
+    def initialize_predictor(cls, num_threads: Optional[int]) -> None:
+        assert not cls._PREDICTOR, "Predictor is already initialized."
+        cls._PREDICTOR = _Predictor(num_threads=num_threads)
+
+    def __init__(self, *args, **kwargs):
+        assert self._PREDICTOR, "You need to initialize the predictor before setting this class as a request handler."
+        super(NYTLabelsRequestHandler, self).__init__(*args, **kwargs)
 
     def __respond(self, http_status: int, response: Union[dict, list]):
+        raw_response = json.dumps(response).encode('utf-8')
         self.send_response(http_status)
         self.send_header('Content-Type', 'application/json; charset=UTF-8')
+        self.send_header('Content-Length', str(len(raw_response)))
         self.end_headers()
-        self.wfile.write(json.dumps(response).encode('utf-8'))
+        self.wfile.write(raw_response)
 
     def __respond_with_error(self, http_status: int, message: str):
         self.__respond(http_status=http_status, response={'error': message})
 
+    # If the request handler's protocol_version is set to "HTTP/1.0" (the default) and the client tries connecting via
+    # HTTP/1.1 and sends an "Expect: 100-continue" header, the client will then wait for a bit (curl waits for a second)
+    # for "100 Continue" which the server will never send (due to it being configured to support HTTP/1.0 only),
+    # therefore the whole request will take a one whole second more.
+    #
+    # Please note that when enabling HTTP/1.1, one has to send Content-Length in their responses.
+    def __check_expect_header(self):
+        expect = self.headers.get('Expect', "")
+        if expect.lower() == "100-continue":
+            if not (self.protocol_version >= "HTTP/1.1" and self.request_version >= "HTTP/1.1"):
+                print((
+                    "WARNING: due to server / client misconfiguration, client sent Expect: header "
+                    "and is waiting for a response, possibly delaying the whole request."""
+                ))
+
     def do_GET(self):
         # noinspection PyUnresolvedReferences
         self.__respond_with_error(http_status=HTTPStatus.BAD_REQUEST.value, message='GET requests are not supported.')
@@ -113,30 +151,10 @@ def do_HEAD(self):
         # noinspection PyUnresolvedReferences
         self.__respond_with_error(http_status=HTTPStatus.BAD_REQUEST.value, message='HEAD requests are not supported.')
 
-    def _predict(self, text: str) -> Dict[str, List[Dict[str, str]]]:
-
-        # Sample length / embedding size is the same for all models
-        first_model = self._PREDICTOR.models[list(self._PREDICTOR.models.keys())[0]]
-        sample_length = first_model.sample_length()
-        embedding_size = first_model.embedding_size()
-
-        vectors = self._PREDICTOR.text2vectors.transform(
-            text=text,
-            sample_length=sample_length,
-            embedding_size=embedding_size,
-        )
-
-        result = dict()
-
-        for model_descriptor, model in self._PREDICTOR.models.items():
-            predictions = model.predict(x_matrix=vectors)
-            result[model_descriptor.json_key] = [
-                {'label': x.label, 'score': "{0:.5f}".format(x.score)} for x in predictions
-            ]
+    def do_POST(self):
 
-        return result
+        self.__check_expect_header()
 
-    def do_POST(self):
         content_length = int(self.headers.get('Content-Length', 0))
         if not content_length:
             # noinspection PyUnresolvedReferences
@@ -174,8 +192,39 @@ def do_POST(self):
             )
             return
 
+        models = payload.get('models', None)
+        if models is None:
+            enabled_model_names = ALL_MODELS
+        else:
+            enabled_model_names = []
+            for model_name in models:
+                if model_name not in ALL_MODELS:
+                    # noinspection PyUnresolvedReferences
+                    self.__respond_with_error(
+                        http_status=HTTPStatus.BAD_REQUEST.value,
+                        message=f"Model '{model_name}' was not found.",
+                    )
+                    return
+                if model_name in enabled_model_names:
+                    # noinspection PyUnresolvedReferences
+                    self.__respond_with_error(
+                        http_status=HTTPStatus.BAD_REQUEST.value,
+                        message=f"Model '{model_name}' is duplicate.",
+                    )
+                    return
+
+                enabled_model_names.append(model_name)
+
+        if not enabled_model_names:
+            # noinspection PyUnresolvedReferences
+            self.__respond_with_error(
+                http_status=HTTPStatus.BAD_REQUEST.value,
+                message="List of enabled models is empty.",
+            )
+            return
+
         try:
-            result = self._predict(text)
+            result = self._PREDICTOR.predict(text=text, enabled_model_names=enabled_model_names)
         except Exception as ex:
             # noinspection PyUnresolvedReferences
             self.__respond_with_error(
@@ -187,15 +236,29 @@ def do_POST(self):
         self.__respond(http_status=HTTPStatus.OK, response=result)
 
 
-def run(port: int = 8080):
-    server_address = ('', port)
-    httpd = HTTPServer(server_address, NYTLabelsRequestHandler)
-    print(f'Starting NYTLabels annotator on port {port}...')
+def make_nytlabels_request_handler_class(num_threads: Optional[int]) -> Type[NYTLabelsRequestHandler]:
+    class CustomNYTLabelsRequestHandler(NYTLabelsRequestHandler):
+        pass
+
+    CustomNYTLabelsRequestHandler.initialize_predictor(num_threads=num_threads)
+
+    return CustomNYTLabelsRequestHandler
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Start NYTLabels annotator web service.")
+    parser.add_argument("-p", "--port", type=int, required=False, default=8080,
+                        help="Port to listen to")
+    parser.add_argument("-t", "--num_threads", type=int, required=False,
+                        help="Threads that the model runtime should spawn")
+    args = parser.parse_args()
+
+    server_address = ('', args.port)
+    handler_class = make_nytlabels_request_handler_class(num_threads=args.num_threads)
+    httpd = HTTPServer(server_address, handler_class)
+    print(f'Starting NYTLabels annotator on port {args.port}...')
     httpd.serve_forever()
 
 
 if __name__ == "__main__":
-    if len(argv) == 2:
-        run(port=int(argv[1]))
-    else:
-        run()
+    main()
diff --git a/apps/nytlabels-fetch-annotation-and-tag/.dockerignore b/apps/nytlabels-fetch-annotation-and-tag/.dockerignore
index 752414ae9c..9b2c362a80 100644
--- a/apps/nytlabels-fetch-annotation-and-tag/.dockerignore
+++ b/apps/nytlabels-fetch-annotation-and-tag/.dockerignore
@@ -89,3 +89,4 @@ sdist
 Temporary Items
 wheels
 _Inline
+
diff --git a/apps/nytlabels-fetch-annotation-and-tag/.idea/mediawords.sql b/apps/nytlabels-fetch-annotation-and-tag/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/nytlabels-fetch-annotation-and-tag/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/nytlabels-fetch-annotation-and-tag/.idea/misc.xml b/apps/nytlabels-fetch-annotation-and-tag/.idea/misc.xml
index bd61294576..2ac35808ab 100644
--- a/apps/nytlabels-fetch-annotation-and-tag/.idea/misc.xml
+++ b/apps/nytlabels-fetch-annotation-and-tag/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.9 Docker Compose (nytlabels-fetch-annotation-and-tag at [/Users/jamesotoole/mediacloud/backend/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (nytlabels-fetch-annotation-and-tag at [/home/pypt/m/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/nytlabels-fetch-annotation-and-tag/.idea/nytlabels-fetch-annotation-and-tag.iml b/apps/nytlabels-fetch-annotation-and-tag/.idea/nytlabels-fetch-annotation-and-tag.iml
index 10163454cb..9a2244a452 100644
--- a/apps/nytlabels-fetch-annotation-and-tag/.idea/nytlabels-fetch-annotation-and-tag.iml
+++ b/apps/nytlabels-fetch-annotation-and-tag/.idea/nytlabels-fetch-annotation-and-tag.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.9 Docker Compose (nytlabels-fetch-annotation-and-tag at [/Users/jamesotoole/mediacloud/backend/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (nytlabels-fetch-annotation-and-tag at [/home/pypt/m/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/nytlabels-fetch-annotation-and-tag/.idea/sqlDataSources.xml b/apps/nytlabels-fetch-annotation-and-tag/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..db4a8197d2
--- /dev/null
+++ b/apps/nytlabels-fetch-annotation-and-tag/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="701eb757-a51b-4c25-ba82-c56193a971a2" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml b/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml
index bcabb80022..785e7aacb4 100644
--- a/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml
+++ b/apps/nytlabels-fetch-annotation-and-tag/docker-compose.tests.yml
@@ -52,5 +52,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/nytlabels-fetch-annotation-and-tag/src/python/nytlabels_fetch_annotation_and_tag/nytlabels_tags_from_annotation.py b/apps/nytlabels-fetch-annotation-and-tag/src/python/nytlabels_fetch_annotation_and_tag/nytlabels_tags_from_annotation.py
index afae2e395f..848991973a 100644
--- a/apps/nytlabels-fetch-annotation-and-tag/src/python/nytlabels_fetch_annotation_and_tag/nytlabels_tags_from_annotation.py
+++ b/apps/nytlabels-fetch-annotation-and-tag/src/python/nytlabels_fetch_annotation_and_tag/nytlabels_tags_from_annotation.py
@@ -13,6 +13,9 @@
 class NYTLabelsTagsFromAnnotation(TagsFromJSONAnnotation):
     """Fetches NYT labels annotation and uses it to generate/store story tags."""
 
+    # Specific model to run the input text against
+    _ENABLED_MODEL = 'descriptors600'
+
     # NYTLabels version tag set
     __NYTLABELS_VERSION_TAG_SET = 'nyt_labels_version'
 
@@ -36,7 +39,7 @@ def _request_for_text(self, text: str) -> Request:
         # Create JSON request
         log.debug("Converting text to JSON request...")
         try:
-            text_json = encode_json({'text': text})
+            text_json = encode_json({'text': text, 'models': [self._ENABLED_MODEL]})
         except Exception as ex:
             # Not critical, might happen to some stories, no need to shut down the annotator
             raise McTagsFromJSONAnnotationException(
@@ -66,8 +69,8 @@ def _fetched_annotation_is_valid(self, annotation: Union[dict, list]) -> bool:
             log.warning("Annotation is not dict: %s" % str(annotation))
             return False
 
-        if 'descriptors600' not in annotation:
-            log.warning("Annotation doesn't have 'descriptors600' key: %s" % str(annotation))
+        if self._ENABLED_MODEL not in annotation:
+            log.warning(f"Annotation doesn't have '{self._ENABLED_MODEL}' key: {annotation}")
             return False
 
         return True
@@ -93,10 +96,10 @@ def _tags_for_annotation(self, annotation: Union[dict, list]) -> List[TagsFromJS
                                                tags_label=nytlabels_version_tag,
                                                tags_description="Story was tagged with '%s'" % nytlabels_version_tag))
 
-        descriptors600 = annotation.get('descriptors600', None)
-        if descriptors600 is not None and len(descriptors600) > 0:
+        descriptors = annotation.get(self._ENABLED_MODEL, None)
+        if descriptors is not None and len(descriptors) > 0:
 
-            for descriptor in descriptors600:
+            for descriptor in descriptors:
 
                 label = descriptor['label']
                 score = float(descriptor['score'])
diff --git a/apps/nytlabels-fetch-annotation-and-tag/tests/python/test_nytlabels_tags_from_annotation.py b/apps/nytlabels-fetch-annotation-and-tag/tests/python/test_nytlabels_tags_from_annotation.py
index 2ee0c9d654..b8027b64e9 100644
--- a/apps/nytlabels-fetch-annotation-and-tag/tests/python/test_nytlabels_tags_from_annotation.py
+++ b/apps/nytlabels-fetch-annotation-and-tag/tests/python/test_nytlabels_tags_from_annotation.py
@@ -6,6 +6,7 @@
 from mediawords.util.network import random_unused_port
 from mediawords.util.parse_json import encode_json
 from mediawords.util.sql import sql_now
+
 from nytlabels_fetch_annotation_and_tag.config import NYTLabelsTagsFromAnnotationConfig
 from nytlabels_fetch_annotation_and_tag.nytlabels_tags_from_annotation import NYTLabelsTagsFromAnnotation
 from nytlabels_fetch_annotation_and_tag.sample_data import sample_nytlabels_response, expected_nytlabels_tags
@@ -14,7 +15,6 @@
 class TestNYTLabelsTagsFromAnnotation(TestCase):
 
     def test_tagging(self):
-
         db = connect_to_db()
 
         media = db.create(table='media', insert_hash={
diff --git a/apps/podcast-fetch-episode/.idea/misc.xml b/apps/podcast-fetch-episode/.idea/misc.xml
deleted file mode 100644
index 64bb3a0baa..0000000000
--- a/apps/podcast-fetch-episode/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="JavaScriptSettings">
-    <option name="languageLevel" value="ES6" />
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (podcast-fetch-episode at [/home/pypt/m/apps/podcast-fetch-episode/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-episode/.idea/podcast-fetch-episode.iml b/apps/podcast-fetch-episode/.idea/podcast-fetch-episode.iml
deleted file mode 100644
index 526ab95d93..0000000000
--- a/apps/podcast-fetch-episode/.idea/podcast-fetch-episode.iml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$">
-      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/bin" isTestSource="false" />
-    </content>
-    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (podcast-fetch-episode at [/home/pypt/m/apps/podcast-fetch-episode/docker-compose.tests.yml])" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="PackageRequirementsSettings">
-    <option name="requirementsPath" value="$MODULE_DIR$/src/requirements.txt" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="pytest" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/apps/podcast-fetch-episode/.idea/sqldialects.xml b/apps/podcast-fetch-episode/.idea/sqldialects.xml
deleted file mode 100644
index 790b3f37f8..0000000000
--- a/apps/podcast-fetch-episode/.idea/sqldialects.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="SqlDialectMappings">
-    <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
-    <file url="PROJECT" dialect="PostgreSQL" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-episode/.idea/webResources.xml b/apps/podcast-fetch-episode/.idea/webResources.xml
deleted file mode 100644
index c30bda4153..0000000000
--- a/apps/podcast-fetch-episode/.idea/webResources.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="WebResourcesPaths">
-    <contentEntries>
-      <entry url="file://$PROJECT_DIR$">
-        <entryData>
-          <resourceRoots>
-            <path value="file://$PROJECT_DIR$/tests/data" />
-          </resourceRoots>
-        </entryData>
-      </entry>
-    </contentEntries>
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-episode/Dockerfile b/apps/podcast-fetch-episode/Dockerfile
deleted file mode 100644
index 6bb28d4eb4..0000000000
--- a/apps/podcast-fetch-episode/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Fetch podcast episode from story, read metadata, store it to GCS
-#
-
-FROM gcr.io/mcback/common:latest
-
-# Install FFmpeg for manipulating audio files
-RUN apt-get -y --no-install-recommends install ffmpeg
-
-# Install Python dependencies
-COPY src/requirements.txt /var/tmp/
-RUN \
-    cd /var/tmp/ && \
-    pip3 install -r requirements.txt && \
-    rm requirements.txt && \
-    rm -rf /root/.cache/ && \
-    true
-
-# Copy sources
-COPY src/ /opt/mediacloud/src/podcast-fetch-episode/
-ENV PERL5LIB="/opt/mediacloud/src/podcast-fetch-episode/perl:${PERL5LIB}" \
-    PYTHONPATH="/opt/mediacloud/src/podcast-fetch-episode/python:${PYTHONPATH}"
-
-# Copy worker script
-COPY bin /opt/mediacloud/bin
-
-USER mediacloud
-
-CMD ["podcast_fetch_episode_worker.py"]
diff --git a/apps/podcast-fetch-episode/bin/podcast_fetch_episode_worker.py b/apps/podcast-fetch-episode/bin/podcast_fetch_episode_worker.py
deleted file mode 100755
index e8fc7ec433..0000000000
--- a/apps/podcast-fetch-episode/bin/podcast_fetch_episode_worker.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python3
-
-from mediawords.db import connect_to_db
-from mediawords.job import JobBroker
-from mediawords.util.log import create_logger
-from mediawords.util.perl import decode_object_from_bytes_if_needed
-from mediawords.util.process import fatal_error
-
-from podcast_fetch_episode.exceptions import McPodcastFetchEpisodeSoftException
-from podcast_fetch_episode.fetch_and_store import fetch_and_store_episode
-
-log = create_logger(__name__)
-
-
-def run_podcast_fetch_episode(stories_id: int) -> None:
-    """Fetch podcast episode for story, upload it to GCS."""
-
-    if isinstance(stories_id, bytes):
-        stories_id = decode_object_from_bytes_if_needed(stories_id)
-    stories_id = int(stories_id)
-
-    db = connect_to_db()
-
-    log.info(f"Fetching podcast episode for story {stories_id}...")
-
-    try:
-        fetch_and_store_episode(db=db, stories_id=stories_id)
-
-        JobBroker(queue_name='MediaWords::Job::Podcast::SubmitOperation').add_to_queue(stories_id=stories_id)
-
-    except McPodcastFetchEpisodeSoftException as ex:
-        # Soft exceptions
-        log.error(f"Unable to fetch podcast episode for story {stories_id}: {ex}")
-        raise ex
-    except Exception as ex:
-        # Hard and other exceptions
-        fatal_error(f"Fatal / unknown error while fetching podcast episode for story {stories_id}: {ex}")
-
-    log.info(f"Done fetching podcast episode for story {stories_id}")
-
-
-if __name__ == '__main__':
-    app = JobBroker(queue_name='MediaWords::Job::Podcast::FetchEpisode')
-    app.start_worker(handler=run_podcast_fetch_episode)
diff --git a/apps/podcast-fetch-episode/docker-compose.tests.yml b/apps/podcast-fetch-episode/docker-compose.tests.yml
deleted file mode 100644
index 599c00c076..0000000000
--- a/apps/podcast-fetch-episode/docker-compose.tests.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-version: "3.7"
-
-services:
-
-    podcast-fetch-episode:
-        image: gcr.io/mcback/podcast-fetch-episode:latest
-        init: true
-        stop_signal: SIGKILL
-        environment:
-            MC_PODCAST_GC_AUTH_JSON_BASE64: "${MC_PODCAST_GC_AUTH_JSON_BASE64}"
-            MC_PODCAST_FETCH_EPISODE_BUCKET_NAME: "${MC_PODCAST_FETCH_EPISODE_BUCKET_NAME}"
-            # Dev/test environments don't use "MC_PODCAST_FETCH_EPISODE_PATH_PREFIX" environment
-            # variable as they create a different, timestamped prefix for every test run.
-        volumes:
-            - type: bind
-              source: ./bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./src/
-              target: /opt/mediacloud/src/podcast-fetch-episode/
-            - type: bind
-              source: ./tests/
-              target: /opt/mediacloud/tests/
-            - type: bind
-              source: ./../common/src/
-              target: /opt/mediacloud/src/common/
-        depends_on:
-            - postgresql-pgbouncer
-            # We don't need "rabbitmq-server" to run tests
-
-    postgresql-pgbouncer:
-        image: gcr.io/mcback/postgresql-pgbouncer:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 6432
-        volumes:
-            - type: bind
-              source: ./../postgresql-pgbouncer/conf/
-              target: /etc/pgbouncer/
-        depends_on:
-            - postgresql-server
-
-    postgresql-server:
-        image: gcr.io/mcback/postgresql-server:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 5432
-        volumes:
-            - type: bind
-              source: ./../postgresql-server/bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./../postgresql-server/schema/
-              target: /opt/mediacloud/schema/
-            - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/config.py b/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/config.py
deleted file mode 100644
index 05c7d028af..0000000000
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/config.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from mediawords.util.config import env_value, file_with_env_value
-
-
-class PodcastFetchEpisodeConfig(object):
-    """
-    Podcast episode fetcher configuration.
-    """
-
-    @staticmethod
-    def gc_auth_json_file() -> str:
-        """Return path to Google Cloud authentication JSON file."""
-        return file_with_env_value(name='MC_PODCAST_GC_AUTH_JSON_BASE64', encoded_with_base64=True)
-
-    @staticmethod
-    def gc_storage_bucket_name() -> str:
-        """Return Google Cloud Storage bucket name."""
-        return env_value(name='MC_PODCAST_FETCH_EPISODE_BUCKET_NAME')
-
-    @staticmethod
-    def gc_storage_path_prefix() -> str:
-        """Return Google Cloud Storage path prefix under which objects will be stored."""
-        return env_value(name='MC_PODCAST_FETCH_EPISODE_PATH_PREFIX')
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/exceptions.py b/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/exceptions.py
deleted file mode 100644
index 9c95054ffd..0000000000
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/exceptions.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import abc
-
-
-class _AbstractMcPodcastFetchEpisodeException(Exception, metaclass=abc.ABCMeta):
-    """Abstract exception."""
-    pass
-
-
-class McPodcastFetchEpisodeSoftException(_AbstractMcPodcastFetchEpisodeException):
-    """Soft errors exception."""
-    pass
-
-
-class McStoryNotFoundException(McPodcastFetchEpisodeSoftException):
-    """Exception raised when story was not found."""
-    pass
-
-
-class McPodcastNoViableStoryEnclosuresException(McPodcastFetchEpisodeSoftException):
-    """Exception thrown when story has no viable enclosures to choose from."""
-    pass
-
-
-class McPodcastEnclosureTooBigException(McPodcastFetchEpisodeSoftException):
-    """Exception thrown when story's best viable enclosure is too big."""
-    pass
-
-
-class McPodcastFileFetchFailureException(McPodcastFetchEpisodeSoftException):
-    """Exception thrown when we're unable to fetch the downloaded file for whatever reason."""
-    pass
-
-
-class McPodcastFileIsInvalidException(McPodcastFetchEpisodeSoftException):
-    """Exception thrown when the fetched file is not something that we can process for whatever reason."""
-    pass
-
-
-# ---
-
-class McPodcastFetchEpisodeHardException(_AbstractMcPodcastFetchEpisodeException):
-    """Hard errors exception."""
-    pass
-
-
-class McPodcastFileStoreFailureException(McPodcastFetchEpisodeHardException):
-    """
-    Exception thrown when we're unable to store the downloaded file for whatever reason.
-
-    This is a hard exception as not being able to store a file means that we might be out of disk space or something
-    like that.
-    """
-    pass
-
-
-class McPodcastGCSStoreFailureException(McPodcastFetchEpisodeHardException):
-    """
-    Exception thrown when we're unable to store an object to Google Cloud Storage.
-
-    GCS problems, if any, are probably temporary, but still, in those cases we should retry a few times and then give up
-    permanently because not being able to store stuff to GCS might mean that we ran out of some sort of a limit,
-    credentials are wrong, etc.
-    """
-    pass
-
-
-class McPodcastMisconfiguredTranscoderException(McPodcastFetchEpisodeHardException):
-    """Exception thrown when something happens with the transcoder that we didn't anticipate before."""
-    pass
-
-
-class McPodcastMisconfiguredGCSException(McPodcastFetchEpisodeHardException):
-    """Exception thrown when something happens with Google Cloud Storage that we didn't anticipate before."""
-    pass
-
-
-class McPodcastPostgreSQLException(McPodcastFetchEpisodeHardException):
-    """Exception thrown on PostgreSQL errors."""
-    pass
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/fetch_and_store.py b/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/fetch_and_store.py
deleted file mode 100644
index 82836d1dbf..0000000000
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/fetch_and_store.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import os
-import shutil
-import tempfile
-from typing import Optional
-
-from mediawords.db import DatabaseHandler
-from mediawords.util.identify_language import language_code_for_text, identification_would_be_reliable
-from mediawords.util.log import create_logger
-from mediawords.util.parse_html import html_strip
-
-from podcast_fetch_episode.bcp47_lang import iso_639_1_code_to_bcp_47_identifier
-from podcast_fetch_episode.config import PodcastFetchEpisodeConfig
-from podcast_fetch_episode.enclosure import podcast_viable_enclosure_for_story, MAX_ENCLOSURE_SIZE
-from podcast_fetch_episode.exceptions import (
-    McStoryNotFoundException,
-    McPodcastNoViableStoryEnclosuresException,
-    McPodcastEnclosureTooBigException,
-    McPodcastFileStoreFailureException,
-    McPodcastFileFetchFailureException,
-    McPodcastGCSStoreFailureException,
-    McPodcastPostgreSQLException,
-)
-from podcast_fetch_episode.fetch_url import fetch_big_file
-from podcast_fetch_episode.gcs_store import GCSStore
-from podcast_fetch_episode.media_file import TranscodeTempDirAndFile, transcode_media_file_if_needed, media_file_info
-
-log = create_logger(__name__)
-
-
-def _cleanup_temp_dir(temp: TranscodeTempDirAndFile) -> None:
-    """Clean up temporary directory or raise a hard exception."""
-    try:
-        shutil.rmtree(temp.temp_dir)
-    except Exception as ex:
-        # Not being able to clean up after ourselves is a "hard" error as we might run out of disk space that way
-        raise McPodcastFileStoreFailureException(f"Unable to remove temporary directory: {ex}")
-
-
-def fetch_and_store_episode(db: DatabaseHandler,
-                            stories_id: int,
-                            config: Optional[PodcastFetchEpisodeConfig] = None) -> None:
-    """
-    Choose a viable story enclosure for podcast, fetch it, transcode if needed, store to GCS, and record to DB.
-
-    1) Determines the episode's likely language by looking into its title and description, converts the language code to
-       BCP 47;
-    1) Using enclosures from "story_enclosures", chooses the one that looks like a podcast episode the most;
-    2) Fetches the chosen enclosure;
-    3) Transcodes the file (if needed) by:
-        a) converting it to an audio format that the Speech API can support, and / or
-        b) discarding video stream from the media file, and / or
-        c) discarding other audio streams from the media file;
-    5) Reads the various parameters, e.g. sample rate, of the episode audio file;
-    4) Uploads the episode audio file to Google Cloud Storage;
-    5) Adds a row to "podcast_episodes".
-
-    Adding a job to submit the newly created episode to Speech API (by adding a RabbitMQ job) is up to the caller.
-
-    :param db: Database handler.
-    :param stories_id: Story ID for the story to operate on.
-    :param config: (optional) Podcast fetcher configuration object (useful for testing).
-    """
-
-    if not config:
-        config = PodcastFetchEpisodeConfig()
-
-    story = db.find_by_id(table='stories', object_id=stories_id)
-    if not story:
-        raise McStoryNotFoundException(f"Story {stories_id} was not found.")
-
-    # Try to determine language of the story
-    story_title = story['title']
-    story_description = html_strip(story['description'])
-    sample_text = f"{story_title}\n{story_description}"
-
-    iso_639_1_language_code = None
-    if identification_would_be_reliable(text=sample_text):
-        iso_639_1_language_code = language_code_for_text(text=sample_text)
-
-    if not iso_639_1_language_code:
-        iso_639_1_language_code = 'en'
-
-    # Convert to BCP 47 identifier
-    bcp_47_language_code = iso_639_1_code_to_bcp_47_identifier(
-        iso_639_1_code=iso_639_1_language_code,
-        url_hint=story['url'],
-    )
-
-    # Find the enclosure that might work the best
-    best_enclosure = podcast_viable_enclosure_for_story(db=db, stories_id=stories_id)
-    if not best_enclosure:
-        raise McPodcastNoViableStoryEnclosuresException(f"There were no viable enclosures found for story {stories_id}")
-
-    if best_enclosure.length:
-        if best_enclosure.length > MAX_ENCLOSURE_SIZE:
-            raise McPodcastEnclosureTooBigException(f"Chosen enclosure {best_enclosure} is too big.")
-
-    try:
-        temp_dir = tempfile.mkdtemp('fetch_and_store')
-    except Exception as ex:
-        raise McPodcastFileStoreFailureException(f"Unable to create temporary directory: {ex}")
-
-    # Fetch enclosure
-    input_filename = 'input_file'
-    input_file_path = os.path.join(temp_dir, input_filename)
-    log.info(f"Fetching enclosure {best_enclosure} to {input_file_path}...")
-    fetch_big_file(url=best_enclosure.url, dest_file=input_file_path, max_size=MAX_ENCLOSURE_SIZE)
-    log.info(f"Done fetching enclosure {best_enclosure} to {input_file_path}")
-
-    if os.stat(input_file_path).st_size == 0:
-        # Might happen with misconfigured webservers
-        raise McPodcastFileFetchFailureException(f"Fetched file {input_file_path} is empty.")
-
-    # Transcode if needed
-    input_file_obj = TranscodeTempDirAndFile(temp_dir=temp_dir, filename=input_filename)
-    transcoded_file_obj = transcode_media_file_if_needed(input_media_file=input_file_obj)
-
-    # Unset the variable so that we don't accidentally use it later
-    del input_filename, temp_dir
-
-    if input_file_obj != transcoded_file_obj:
-        # Function did some transcoding and stored everything in yet another file
-
-        # Remove the input file
-        _cleanup_temp_dir(temp=input_file_obj)
-
-        # Consider the transcoded file the new input file
-        input_file_obj = transcoded_file_obj
-
-    # (Re)read the properties of either the original or the transcoded file
-    media_info = media_file_info(media_file_path=input_file_obj.temp_full_path)
-    best_audio_stream = media_info.best_supported_audio_stream()
-
-    # Store input file to GCS
-    try:
-        gcs = GCSStore(config=config)
-        gcs_uri = gcs.store_object(
-            local_file_path=input_file_obj.temp_full_path,
-            object_id=str(stories_id),
-            mime_type=best_audio_stream.audio_codec_class.mime_type(),
-        )
-
-    except Exception as ex:
-
-        log.error(f"Unable to store episode file '{input_file_obj.temp_full_path}' for story {stories_id}: {ex}")
-
-        # Clean up, then raise further
-        _cleanup_temp_dir(temp=input_file_obj)
-
-        raise ex
-
-    # Clean up the locally stored file as we don't need it anymore
-    _cleanup_temp_dir(temp=input_file_obj)
-
-    # Insert everything to the database
-    try:
-        db.query("""
-            INSERT INTO podcast_episodes (
-                stories_id,
-                story_enclosures_id,
-                gcs_uri,
-                duration,
-                codec,
-                sample_rate,
-                bcp47_language_code
-            ) VALUES (
-                %(stories_id)s,
-                %(story_enclosures_id)s,
-                %(gcs_uri)s,
-                %(duration)s,
-                %(codec)s,
-                %(sample_rate)s,
-                %(bcp47_language_code)s            
-            ) ON CONFLICT (stories_id) DO UPDATE SET
-                story_enclosures_id = %(story_enclosures_id)s,
-                gcs_uri = %(gcs_uri)s,
-                duration = %(duration)s,
-                codec = %(codec)s,
-                sample_rate = %(sample_rate)s,
-                bcp47_language_code = %(bcp47_language_code)s
-        """, {
-            'stories_id': stories_id,
-            'story_enclosures_id': best_enclosure.story_enclosures_id,
-            'gcs_uri': gcs_uri,
-            'duration': best_audio_stream.duration,
-            'codec': best_audio_stream.audio_codec_class.postgresql_enum_value(),
-            'sample_rate': best_audio_stream.sample_rate,
-            'bcp47_language_code': bcp_47_language_code,
-        })
-
-    except Exception as ex_db:
-
-        # Try to delete object on GCS first
-        try:
-            gcs.delete_object(object_id=str(stories_id))
-        except Exception as ex_gcs:
-            # We should be able to delete it as we've just uploaded it
-            raise McPodcastGCSStoreFailureException((
-                f"Unable to clean up story's {stories_id} audio file from GCS after database insert failure; "
-                f"database insert exception: {ex_db}; "
-                f"GCS exception: {ex_gcs}")
-            )
-
-        raise McPodcastPostgreSQLException(f"Failed inserting episode for story {stories_id}: {ex_db}")
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/gcs_store.py b/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/gcs_store.py
deleted file mode 100644
index 579ceb3afb..0000000000
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/gcs_store.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import os
-from typing import Optional
-
-# noinspection PyPackageRequirements
-from google.cloud import storage
-# noinspection PyPackageRequirements
-from google.cloud.exceptions import NotFound
-# noinspection PyPackageRequirements
-from google.cloud.storage import Blob, Bucket
-
-from mediawords.util.log import create_logger
-
-from podcast_fetch_episode.config import PodcastFetchEpisodeConfig
-from podcast_fetch_episode.exceptions import (
-    McPodcastGCSStoreFailureException,
-    McPodcastMisconfiguredGCSException,
-)
-
-log = create_logger(__name__)
-
-
-class GCSStore(object):
-    """Google Cloud Storage store."""
-
-    __slots__ = [
-        '__bucket_internal',
-        '__config',
-    ]
-
-    def __init__(self, config: Optional[PodcastFetchEpisodeConfig] = None):
-        if not config:
-            config = PodcastFetchEpisodeConfig()
-
-        self.__config = config
-        self.__bucket_internal = None
-
-    @property
-    def _bucket(self) -> Bucket:
-        """Lazy-loaded bucket."""
-        if not self.__bucket_internal:
-
-            try:
-                storage_client = storage.Client.from_service_account_json(self.__config.gc_auth_json_file())
-                self.__bucket_internal = storage_client.get_bucket(self.__config.gc_storage_bucket_name())
-            except Exception as ex:
-                raise McPodcastGCSStoreFailureException(
-                    f"Unable to get GCS bucket '{self.__config.gc_storage_bucket_name()}': {ex}"
-                )
-
-        return self.__bucket_internal
-
-    @classmethod
-    def _remote_path(cls, path_prefix: str, object_id: str):
-        if not object_id:
-            raise McPodcastMisconfiguredGCSException("Object ID is unset.")
-
-        path = os.path.join(path_prefix, object_id)
-
-        # GCS doesn't like double slashes...
-        path = os.path.normpath(path)
-
-        # ...nor is a fan of slashes at the start of path
-        while path.startswith('/'):
-            path = path[1:]
-
-        return path
-
-    def _blob_from_object_id(self, object_id: str) -> Blob:
-        if not object_id:
-            raise McPodcastMisconfiguredGCSException("Object ID is unset.")
-
-        remote_path = self._remote_path(path_prefix=self.__config.gc_storage_path_prefix(), object_id=object_id)
-        blob = self._bucket.blob(remote_path)
-        return blob
-
-    def object_exists(self, object_id: str) -> bool:
-        """
-        Test if object exists at remote location.
-
-        :param object_id: Object ID that should be tested.
-        :return: True if object already exists under a given object ID.
-        """
-
-        if not object_id:
-            raise McPodcastMisconfiguredGCSException("Object ID is unset.")
-
-        log.debug(f"Testing if object ID {object_id} exists...")
-
-        blob = self._blob_from_object_id(object_id=object_id)
-
-        log.debug(f"Testing blob for existence: {blob}")
-
-        try:
-            # blob.reload() returns metadata too
-            blob.reload()
-
-        except NotFound as ex:
-            log.debug(f"Object '{object_id}' was not found: {ex}")
-            exists = False
-
-        except Exception as ex:
-            raise McPodcastGCSStoreFailureException(f"Unable to test whether GCS object {object_id} exists: {ex}")
-
-        else:
-            exists = True
-
-        return exists
-
-    def store_object(self, local_file_path: str, object_id: str, mime_type: Optional[str] = None) -> str:
-        """
-        Store a local file to a remote location.
-
-        Will overwrite existing objects with a warning.
-
-        :param local_file_path: Local file that should be stored.
-        :param object_id: Object ID under which the object should be stored.
-        :param mime_type: MIME type which, if set, will be stored as "Content-Type".
-        :return: Full Google Cloud Storage URI of the object, e.g. "gs://<bucket_name>/<path>/<object_id>".
-        """
-
-        if not os.path.isfile(local_file_path):
-            raise McPodcastMisconfiguredGCSException(f"Local file '{local_file_path}' does not exist.")
-
-        if not object_id:
-            raise McPodcastMisconfiguredGCSException("Object ID is unset.")
-
-        log.debug(f"Storing file '{local_file_path}' as object ID {object_id}...")
-
-        if self.object_exists(object_id=object_id):
-            log.warning(f"Object {object_id} already exists, will overwrite.")
-
-        blob = self._blob_from_object_id(object_id=object_id)
-
-        blob.upload_from_filename(filename=local_file_path, content_type=mime_type)
-
-        return self.object_uri(object_id=object_id)
-
-    def delete_object(self, object_id: str) -> None:
-        """
-        Delete object from remote location.
-
-        Doesn't raise if object doesn't exist.
-
-        :param object_id: Object ID that should be deleted.
-        """
-
-        if not object_id:
-            raise McPodcastMisconfiguredGCSException("Object ID is unset.")
-
-        log.debug(f"Deleting object ID {object_id}...")
-
-        blob = self._blob_from_object_id(object_id=object_id)
-
-        try:
-            blob.delete()
-
-        except NotFound:
-            log.warning(f"Object {object_id} doesn't exist.")
-
-        except Exception as ex:
-            raise McPodcastGCSStoreFailureException(f"Unable to delete GCS object {object_id}: {ex}")
-
-    def object_uri(self, object_id: str) -> str:
-        """
-        Generate Google Cloud Storage URI for the object.
-
-        :param object_id: Object ID to return the URI for.
-        :return: Full Google Cloud Storage URI of the object, e.g. "gs://<bucket_name>/<path>/<object_id>".
-        """
-
-        if not object_id:
-            raise McPodcastMisconfiguredGCSException("Object ID is unset.")
-
-        uri = "gs://{host}/{remote_path}".format(
-            host=self.__config.gc_storage_bucket_name(),
-            remote_path=self._remote_path(path_prefix=self.__config.gc_storage_path_prefix(), object_id=object_id),
-        )
-
-        return uri
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/media_file.py b/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/media_file.py
deleted file mode 100644
index debd695878..0000000000
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/media_file.py
+++ /dev/null
@@ -1,298 +0,0 @@
-import dataclasses
-import subprocess
-import math
-import os
-import shutil
-import tempfile
-from typing import Type, Optional, List
-
-import ffmpeg
-
-from mediawords.util.log import create_logger
-
-from podcast_fetch_episode.audio_codecs import (
-    AbstractAudioCodec,
-    Linear16AudioCodec,
-    FLACAudioCodec,
-    MULAWAudioCodec,
-    OggOpusAudioCodec,
-    MP3AudioCodec,
-)
-from podcast_fetch_episode.exceptions import (
-    McPodcastMisconfiguredTranscoderException,
-    McPodcastFileIsInvalidException,
-    McPodcastFileStoreFailureException,
-)
-
-log = create_logger(__name__)
-
-_SUPPORTED_CODEC_CLASSES = {
-    Linear16AudioCodec,
-    FLACAudioCodec,
-    MULAWAudioCodec,
-    OggOpusAudioCodec,
-    MP3AudioCodec,
-}
-"""Supported native audio codec classes."""
-
-
-@dataclasses.dataclass
-class MediaFileInfoAudioStream(object):
-    """Information about a single audio stream in a media file."""
-
-    ffmpeg_stream_index: int
-    """FFmpeg internal stream index."""
-
-    audio_codec_class: Optional[Type[AbstractAudioCodec]]
-    """Audio codec class if the stream is one of the supported types and has single (mono) channel, None otherwise."""
-
-    duration: int
-    """Duration (in seconds)."""
-
-    audio_channel_count: int
-    """Audio channel count."""
-
-    sample_rate: int
-    """Audio sample rate."""
-
-
-@dataclasses.dataclass
-class MediaFileInfo(object):
-    """Information about media file."""
-
-    audio_streams: List[MediaFileInfoAudioStream]
-    """List of audio streams found in the media file."""
-
-    has_video_streams: bool
-    """True if the media file has video streams."""
-
-    def best_supported_audio_stream(self) -> Optional[MediaFileInfoAudioStream]:
-        """Return the first supported audio stream, if any."""
-        for stream in self.audio_streams:
-            if stream.audio_codec_class:
-                return stream
-        return None
-
-
-def media_file_info(media_file_path: str) -> MediaFileInfo:
-    """
-    Read audio / video media file information, or raise if it can't be read.
-
-    :param media_file_path: Full path to media file.
-    :return: MediaFileInfo object.
-    """
-    if not os.path.isfile(media_file_path):
-        # Input file should exist at this point; it it doesn't, we have probably messed up something in the code
-        raise McPodcastMisconfiguredTranscoderException(f"Input file {media_file_path} does not exist.")
-
-    try:
-        file_info = ffmpeg.probe(media_file_path)
-        if not file_info:
-            raise Exception("Returned metadata is empty.")
-    except Exception as ex:
-        raise McPodcastFileIsInvalidException(
-            f"Unable to read metadata from file {media_file_path}: {ex}"
-        )
-
-    if 'streams' not in file_info:
-        # FFmpeg should come up with some sort of a stream in any case
-        raise McPodcastMisconfiguredTranscoderException("Returned probe doesn't have 'streams' key.")
-
-    # Test if one of the audio streams is of one of the supported codecs
-    audio_streams = []
-    has_video_streams = False
-    for stream in file_info['streams']:
-        if stream['codec_type'] == 'audio':
-
-            try:
-                audio_channel_count = int(stream['channels'])
-                if audio_channel_count == 0:
-                    raise Exception("Audio channel count is 0")
-            except Exception as ex:
-                log.warning(f"Unable to read audio channel count from stream {stream}: {ex}")
-                # Just skip this stream if we can't figure it out
-                continue
-
-            audio_codec_class = None
-
-            # We'll need to transcode audio files with more than one channel count anyway
-            if audio_channel_count == 1:
-                for codec in _SUPPORTED_CODEC_CLASSES:
-                    if codec.ffmpeg_stream_is_this_codec(ffmpeg_stream=stream):
-                        audio_codec_class = codec
-                        break
-
-            try:
-
-                if 'duration' in stream:
-                    # 'duration': '3.766621'
-                    duration = math.floor(float(stream['duration']))
-
-                elif 'DURATION' in stream.get('tags', {}):
-                    # 'DURATION': '00:00:03.824000000'
-                    duration_parts = stream['tags']['DURATION'].split(':')
-                    if len(duration_parts) != 3:
-                        raise McPodcastFileIsInvalidException(f"Unable to parse 'DURATION': {duration_parts}")
-
-                    hh = int(duration_parts[0])
-                    mm = int(duration_parts[1])
-                    ss_ms = duration_parts[2].split('.')
-
-                    if len(ss_ms) == 1:
-                        ss = int(ss_ms[0])
-                        ms = 0
-                    elif len(ss_ms) == 2:
-                        ss = int(ss_ms[0])
-                        ms = int(ss_ms[1])
-                    else:
-                        raise McPodcastFileIsInvalidException(f"Unable to parse 'DURATION': {duration_parts}")
-
-                    duration = hh * 3600 + mm * 60 + ss + (1 if ms > 0 else 0)
-
-                else:
-                    raise McPodcastFileIsInvalidException(f"Stream doesn't have duration: {stream}")
-
-                audio_stream = MediaFileInfoAudioStream(
-                    ffmpeg_stream_index=stream['index'],
-                    audio_codec_class=audio_codec_class,
-                    duration=duration,
-                    audio_channel_count=audio_channel_count,
-                    sample_rate=int(stream['sample_rate']),
-                )
-                audio_streams.append(audio_stream)
-
-            except Exception as ex:
-                # Just skip this stream if we can't figure it out
-                log.warning(f"Unable to read audio stream data for stream {stream}: {ex}")
-
-        elif stream['codec_type'] == 'video':
-            has_video_streams = True
-
-    return MediaFileInfo(
-        audio_streams=audio_streams,
-        has_video_streams=has_video_streams,
-    )
-
-
-@dataclasses.dataclass
-class TranscodeTempDirAndFile(object):
-    """
-    Temporary directory and filename for transcoding.
-
-    It is assumed that caller is free to recursively remove 'temp_directory' after making use of the transcoded file.
-    """
-    temp_dir: str
-    filename: str
-
-    @property
-    def temp_full_path(self) -> str:
-        """Return full path to file."""
-        return os.path.join(self.temp_dir, self.filename)
-
-
-def transcode_media_file_if_needed(input_media_file: TranscodeTempDirAndFile) -> TranscodeTempDirAndFile:
-    """
-    Transcode file (if needed) to something that Speech API will support.
-
-    * If input has a video stream, it will be discarded;
-    * If input has more than one audio stream, others will be discarded leaving only one (preferably the one that Speech
-      API can support);
-    * If input doesn't have an audio stream in Speech API-supported codec, it will be transcoded to lossless
-      FLAC 16 bit in order to preserve quality;
-    * If the chosen audio stream has multiple channels (e.g. stereo or 5.1), it will be mixed into a single (mono)
-      channel as Speech API supports multi-channel recognition only when different voices speak into each of the
-      channels.
-
-    :param input_media_file: Temporary directory and input media file to consider transcoding.
-    :return: Either the same 'input_media_file' if file wasn't transcoded, or new TranscodeTempDirAndFile() if it was.
-    """
-
-    if not os.path.isdir(input_media_file.temp_dir):
-        # Directory should exist; if it doesn't, it's a critical problem either in the filesystem or the code
-        raise McPodcastMisconfiguredTranscoderException(f"Directory '{input_media_file.temp_dir}' does not exist.")
-
-    if not os.path.isfile(input_media_file.temp_full_path):
-        raise McPodcastMisconfiguredTranscoderException(f"File '{input_media_file}' does not exist.")
-
-    # Independently from what <enclosure /> has told us, identify the file type again ourselves
-    media_info = media_file_info(media_file_path=input_media_file.temp_full_path)
-
-    if not media_info.audio_streams:
-        raise McPodcastFileIsInvalidException("Downloaded file doesn't appear to have any audio streams.")
-
-    ffmpeg_args = []
-
-    supported_audio_stream = media_info.best_supported_audio_stream()
-    if supported_audio_stream:
-        log.info(f"Found a supported audio stream")
-
-        # Test if there is more than one audio stream
-        if len(media_info.audio_streams) > 1:
-            log.info(f"Found other audio streams besides the supported one, will discard those")
-
-            ffmpeg_args.extend(['-f', supported_audio_stream.audio_codec_class.ffmpeg_container_format()])
-
-            # Select all audio streams
-            ffmpeg_args.extend(['-map', '0:a'])
-
-            for stream in media_info.audio_streams:
-                # Deselect the unsupported streams
-                if stream != supported_audio_stream:
-                    ffmpeg_args.extend(['-map', f'-0:a:{stream.ffmpeg_stream_index}'])
-
-    # If a stream of a supported codec was not found, transcode it to FLAC 16 bit in order to not lose any quality
-    else:
-        log.info(f"None of the audio streams are supported by the Speech API, will transcode to FLAC")
-
-        # Map first audio stream to input 0
-        ffmpeg_args.extend(['-map', '0:a:0'])
-
-        # Transcode to FLAC (16 bit) in order to not lose any quality
-        ffmpeg_args.extend(['-acodec', 'flac'])
-        ffmpeg_args.extend(['-f', 'flac'])
-        ffmpeg_args.extend(['-sample_fmt', 's16'])
-
-        # Ensure that we end up with mono audio
-        ffmpeg_args.extend(['-ac', '1'])
-
-    # If there's video in the file (e.g. video), remove it
-    if media_info.has_video_streams:
-        # Discard all video streams
-        ffmpeg_args.extend(['-map', '-0:v'])
-
-    if ffmpeg_args:
-
-        temp_filename = 'transcoded_file'
-
-        try:
-            temp_dir = tempfile.mkdtemp('media_file')
-        except Exception as ex:
-            raise McPodcastFileStoreFailureException(f"Unable to create temporary directory: {ex}")
-
-        temp_file_path = os.path.join(temp_dir, temp_filename)
-
-        try:
-            log.info(f"Transcoding {input_media_file.temp_full_path} to {temp_file_path}...")
-
-            # I wasn't sure how to map outputs in "ffmpeg-python" library so here we call ffmpeg directly
-            ffmpeg_command = ['ffmpeg', '-nostdin', '-hide_banner',
-                              '-i', input_media_file.temp_full_path] + ffmpeg_args + [temp_file_path]
-            log.debug(f"FFmpeg command: {ffmpeg_command}")
-            subprocess.check_call(ffmpeg_command)
-
-            log.info(f"Done transcoding {input_media_file.temp_full_path} to {temp_file_path}")
-
-        except Exception as ex:
-
-            shutil.rmtree(temp_dir)
-
-            raise McPodcastFileIsInvalidException(f"Unable to transcode {input_media_file.temp_full_path}: {ex}")
-
-        result_media_file = TranscodeTempDirAndFile(temp_dir=temp_dir, filename=temp_filename)
-
-    else:
-
-        # Return the same file as it wasn't touched
-        result_media_file = input_media_file
-
-    return result_media_file
diff --git a/apps/podcast-fetch-episode/src/requirements.txt b/apps/podcast-fetch-episode/src/requirements.txt
deleted file mode 100644
index 061d400634..0000000000
--- a/apps/podcast-fetch-episode/src/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-ffmpeg-python==0.2.0
-google-cloud-storage==1.35.0
diff --git a/apps/podcast-fetch-episode/tests/python/config_random_gcs_prefix.py b/apps/podcast-fetch-episode/tests/python/config_random_gcs_prefix.py
deleted file mode 100644
index f01de8e910..0000000000
--- a/apps/podcast-fetch-episode/tests/python/config_random_gcs_prefix.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import datetime
-
-from podcast_fetch_episode.config import PodcastFetchEpisodeConfig
-
-
-class RandomPathPrefixConfig(PodcastFetchEpisodeConfig):
-    """Configuration which stores GCS objects under a timestamped prefix."""
-    _RANDOM_PREFIX = None
-
-    @staticmethod
-    def gc_storage_path_prefix() -> str:
-        if not RandomPathPrefixConfig._RANDOM_PREFIX:
-            date = datetime.datetime.utcnow().isoformat()
-            date = date.replace(':', '_')
-            RandomPathPrefixConfig._RANDOM_PREFIX = f'tests-{date}'
-        return RandomPathPrefixConfig._RANDOM_PREFIX
diff --git a/apps/podcast-fetch-episode/tests/python/test_fetch_and_store.py b/apps/podcast-fetch-episode/tests/python/test_fetch_and_store.py
deleted file mode 100644
index 326bdd4d1c..0000000000
--- a/apps/podcast-fetch-episode/tests/python/test_fetch_and_store.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import os
-from typing import Union
-
-from mediawords.db import connect_to_db
-from mediawords.test.db.create import create_test_medium, create_test_feed, create_test_story
-from mediawords.test.hash_server import HashServer
-from mediawords.util.network import random_unused_port
-
-from podcast_fetch_episode.fetch_and_store import fetch_and_store_episode
-from podcast_fetch_episode.gcs_store import GCSStore
-
-from .config_random_gcs_prefix import RandomPathPrefixConfig
-
-TEST_MP3_PATH = '/opt/mediacloud/tests/data/media-samples/samples/kim_kardashian-mp3-mono.mp3'
-assert os.path.isfile(TEST_MP3_PATH), f"Test MP3 file '{TEST_MP3_PATH}' should exist."
-
-
-def test_fetch_and_store_episode():
-    db = connect_to_db()
-
-    test_medium = create_test_medium(db=db, label='test')
-    test_feed = create_test_feed(db=db, label='test', medium=test_medium)
-
-    # 'label' is important as it will be stored in both stories.title and stories.description, which in turn will be
-    # used to guess the probable language of the podcast episode
-    test_story = create_test_story(db=db, label='keeping up with Kardashians', feed=test_feed)
-
-    stories_id = test_story['stories_id']
-
-    with open(TEST_MP3_PATH, mode='rb') as f:
-        test_mp3_data = f.read()
-
-    # noinspection PyUnusedLocal
-    def __mp3_callback(request: HashServer.Request) -> Union[str, bytes]:
-        response = "".encode('utf-8')
-        response += "HTTP/1.0 200 OK\r\n".encode('utf-8')
-        response += "Content-Type: audio/mpeg\r\n".encode('utf-8')
-        response += f"Content-Length: {len(test_mp3_data)}\r\n".encode('utf-8')
-        response += "\r\n".encode('utf-8')
-        response += test_mp3_data
-        return response
-
-    port = random_unused_port()
-    pages = {
-        '/test.mp3': {
-            'callback': __mp3_callback,
-        }
-    }
-
-    hs = HashServer(port=port, pages=pages)
-    hs.start()
-
-    mp3_url = f'http://127.0.0.1:{port}/test.mp3'
-
-    story_enclosure = db.insert(table='story_enclosures', insert_hash={
-        'stories_id': stories_id,
-        'url': mp3_url,
-        'mime_type': 'audio/mpeg',
-        'length': len(test_mp3_data),
-    })
-
-    conf = RandomPathPrefixConfig()
-    fetch_and_store_episode(db=db, stories_id=stories_id, config=conf)
-
-    episodes = db.select(table='podcast_episodes', what_to_select='*').hashes()
-    assert len(episodes), f"Only one episode is expected."
-
-    episode = episodes[0]
-    assert episode['stories_id'] == stories_id
-    assert episode['story_enclosures_id'] == story_enclosure['story_enclosures_id']
-    assert episode['gcs_uri'] == f"gs://{conf.gc_storage_bucket_name()}/{conf.gc_storage_path_prefix()}/{stories_id}"
-    assert episode['duration'] > 0
-    assert episode['codec'] == 'MP3'
-    assert episode['sample_rate'] == 44100
-    assert episode['bcp47_language_code'] == 'en-US'
-
-    # Try removing test object
-    gcs = GCSStore(config=conf)
-    gcs.delete_object(object_id=str(stories_id))
diff --git a/apps/podcast-fetch-transcript/.idea/externalDependencies.xml b/apps/podcast-fetch-transcript/.idea/externalDependencies.xml
deleted file mode 100644
index 7872ffbcf2..0000000000
--- a/apps/podcast-fetch-transcript/.idea/externalDependencies.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ExternalDependencies">
-    <plugin id="Docker" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-transcript/.idea/misc.xml b/apps/podcast-fetch-transcript/.idea/misc.xml
deleted file mode 100644
index b31733e855..0000000000
--- a/apps/podcast-fetch-transcript/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="JavaScriptSettings">
-    <option name="languageLevel" value="ES6" />
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (podcast-fetch-transcript at [/home/pypt/m/apps/podcast-fetch-transcript/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-transcript/.idea/modules.xml b/apps/podcast-fetch-transcript/.idea/modules.xml
deleted file mode 100644
index 4ff9c4812f..0000000000
--- a/apps/podcast-fetch-transcript/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/podcast-fetch-transcript.iml" filepath="$PROJECT_DIR$/.idea/podcast-fetch-transcript.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-transcript/.idea/runConfigurations/Dockerfile.xml b/apps/podcast-fetch-transcript/.idea/runConfigurations/Dockerfile.xml
deleted file mode 100644
index 3f86f834cf..0000000000
--- a/apps/podcast-fetch-transcript/.idea/runConfigurations/Dockerfile.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<component name="ProjectRunConfigurationManager">
-  <configuration default="false" name="Dockerfile" type="docker-deploy" factoryName="dockerfile" server-name="mediacloud">
-    <deployment type="dockerfile">
-      <settings>
-        <option name="buildCliOptions" value="--cache-from gcr.io/mcback/podcast-fetch-transcript:latest" />
-        <option name="buildOnly" value="true" />
-        <option name="command" value="" />
-        <option name="containerName" value="" />
-        <option name="entrypoint" value="" />
-        <option name="imageTag" value="gcr.io/mcback/podcast-fetch-transcript:latest" />
-        <option name="commandLineOptions" value="" />
-        <option name="sourceFilePath" value="Dockerfile" />
-      </settings>
-    </deployment>
-    <method v="2" />
-  </configuration>
-</component>
\ No newline at end of file
diff --git a/apps/podcast-fetch-transcript/.idea/sqldialects.xml b/apps/podcast-fetch-transcript/.idea/sqldialects.xml
deleted file mode 100644
index 790b3f37f8..0000000000
--- a/apps/podcast-fetch-transcript/.idea/sqldialects.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="SqlDialectMappings">
-    <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
-    <file url="PROJECT" dialect="PostgreSQL" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-transcript/.idea/vcs.xml b/apps/podcast-fetch-transcript/.idea/vcs.xml
deleted file mode 100644
index a4647a1c0e..0000000000
--- a/apps/podcast-fetch-transcript/.idea/vcs.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/tests/data/media-samples" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-transcript/Dockerfile b/apps/podcast-fetch-transcript/Dockerfile
deleted file mode 100644
index 0a7acb7f8f..0000000000
--- a/apps/podcast-fetch-transcript/Dockerfile
+++ /dev/null
@@ -1,26 +0,0 @@
-#
-# Collect due transcripts from Google Speech API, store them locally as both raw JSON and download text
-#
-
-FROM gcr.io/mcback/common:latest
-
-# Install Python dependencies
-COPY src/requirements.txt /var/tmp/
-RUN \
-    cd /var/tmp/ && \
-    pip3 install -r requirements.txt && \
-    rm requirements.txt && \
-    rm -rf /root/.cache/ && \
-    true
-
-# Copy sources
-COPY src/ /opt/mediacloud/src/podcast-fetch-transcript/
-ENV PERL5LIB="/opt/mediacloud/src/podcast-fetch-transcript/perl:${PERL5LIB}" \
-    PYTHONPATH="/opt/mediacloud/src/podcast-fetch-transcript/python:${PYTHONPATH}"
-
-# Copy worker script
-COPY bin /opt/mediacloud/bin
-
-USER mediacloud
-
-CMD ["podcast_fetch_transcript_worker.py"]
diff --git a/apps/podcast-fetch-transcript/bin/podcast_fetch_transcript_worker.py b/apps/podcast-fetch-transcript/bin/podcast_fetch_transcript_worker.py
deleted file mode 100755
index ae25385834..0000000000
--- a/apps/podcast-fetch-transcript/bin/podcast_fetch_transcript_worker.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python3
-
-from mediawords.db import connect_to_db
-from mediawords.job import JobBroker
-from mediawords.util.log import create_logger
-from mediawords.util.perl import decode_object_from_bytes_if_needed
-from mediawords.util.process import fatal_error
-
-from podcast_fetch_transcript.exceptions import McPodcastFetchTranscriptSoftException
-
-from podcast_fetch_transcript.fetch_store import fetch_store_transcript
-
-log = create_logger(__name__)
-
-
-def run_podcast_fetch_transcript(podcast_episode_transcript_fetches_id: int) -> None:
-    """Fetch a completed episode transcripts from Speech API for story."""
-
-    if isinstance(podcast_episode_transcript_fetches_id, bytes):
-        podcast_episode_transcript_fetches_id = decode_object_from_bytes_if_needed(
-            podcast_episode_transcript_fetches_id)
-    podcast_episode_transcript_fetches_id = int(podcast_episode_transcript_fetches_id)
-
-    if not podcast_episode_transcript_fetches_id:
-        fatal_error("'podcast_episode_transcript_fetches_id' is unset.")
-
-    db = connect_to_db()
-
-    log.info(f"Fetching transcript for fetch ID {podcast_episode_transcript_fetches_id}...")
-
-    try:
-        stories_id = fetch_store_transcript(
-            db=db,
-            podcast_episode_transcript_fetches_id=podcast_episode_transcript_fetches_id,
-        )
-
-        if stories_id:
-            JobBroker(queue_name='MediaWords::Job::ExtractAndVector').add_to_queue(stories_id=stories_id)
-
-    except McPodcastFetchTranscriptSoftException as ex:
-        # Soft exceptions
-        log.error(f"Unable to fetch transcript for fetch ID {podcast_episode_transcript_fetches_id}: {ex}")
-        raise ex
-
-    except Exception as ex:
-        # Hard and other exceptions
-        fatal_error((
-            f"Fatal / unknown error while fetching transcript "
-            f"for ID {podcast_episode_transcript_fetches_id}: {ex}"
-        ))
-
-    log.info(f"Done fetching transcript for ID {podcast_episode_transcript_fetches_id}")
-
-
-if __name__ == '__main__':
-    app = JobBroker(queue_name='MediaWords::Job::Podcast::FetchTranscript')
-    app.start_worker(handler=run_podcast_fetch_transcript)
diff --git a/apps/podcast-fetch-transcript/docker-compose.tests.yml b/apps/podcast-fetch-transcript/docker-compose.tests.yml
deleted file mode 100644
index ea93f92b0a..0000000000
--- a/apps/podcast-fetch-transcript/docker-compose.tests.yml
+++ /dev/null
@@ -1,117 +0,0 @@
-version: "3.7"
-
-services:
-
-    podcast-fetch-transcript:
-        image: gcr.io/mcback/podcast-fetch-transcript:latest
-        init: true
-        stop_signal: SIGKILL
-        environment:
-            MC_PODCAST_GC_AUTH_JSON_BASE64: "${MC_PODCAST_GC_AUTH_JSON_BASE64}"
-            MC_PODCAST_FETCH_TRANSCRIPT_RUN_COSTLY_TEST: "${MC_PODCAST_FETCH_TRANSCRIPT_RUN_COSTLY_TEST}"
-        expose:
-            # "test_full_chain.py" test server's port
-            - 8080
-        volumes:
-            - type: bind
-              source: ./bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./src/
-              target: /opt/mediacloud/src/podcast-fetch-transcript/
-            - type: bind
-              source: ./tests/
-              target: /opt/mediacloud/tests/
-            - type: bind
-              source: ./../common/src/
-              target: /opt/mediacloud/src/common/
-        depends_on:
-            - podcast-fetch-episode
-            - podcast-submit-operation
-            # No "podcast-poll-due-operations" as we'll just go ahead and fetch it ourselves
-            - postgresql-pgbouncer
-            - rabbitmq-server
-
-    podcast-fetch-episode:
-        image: gcr.io/mcback/podcast-fetch-episode:latest
-        init: true
-        stop_signal: SIGKILL
-        environment:
-            MC_PODCAST_GC_AUTH_JSON_BASE64: "${MC_PODCAST_GC_AUTH_JSON_BASE64}"
-            MC_PODCAST_FETCH_EPISODE_BUCKET_NAME: "${MC_PODCAST_FETCH_EPISODE_BUCKET_NAME}"
-            MC_PODCAST_FETCH_EPISODE_PATH_PREFIX: "audio-files/"
-        volumes:
-            - type: bind
-              source: ./../podcast-fetch-episode/bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./../podcast-fetch-episode/src/
-              target: /opt/mediacloud/src/podcast-fetch-episode/
-            - type: bind
-              source: ./../common/src/
-              target: /opt/mediacloud/src/common/
-        depends_on:
-            - postgresql-pgbouncer
-            - rabbitmq-server
-
-    podcast-submit-operation:
-        image: gcr.io/mcback/podcast-submit-operation:latest
-        init: true
-        stop_signal: SIGKILL
-        environment:
-            MC_PODCAST_GC_AUTH_JSON_BASE64: "${MC_PODCAST_GC_AUTH_JSON_BASE64}"
-        volumes:
-            - type: bind
-              source: ./../podcast-submit-operation/bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./../podcast-submit-operation/src/
-              target: /opt/mediacloud/src/podcast-submit-operation/
-            - type: bind
-              source: ./../common/src/
-              target: /opt/mediacloud/src/common/
-        depends_on:
-            - postgresql-pgbouncer
-            - rabbitmq-server
-
-    postgresql-pgbouncer:
-        image: gcr.io/mcback/postgresql-pgbouncer:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 6432
-        volumes:
-            - type: bind
-              source: ./../postgresql-pgbouncer/conf/
-              target: /etc/pgbouncer/
-        depends_on:
-            - postgresql-server
-
-    postgresql-server:
-        image: gcr.io/mcback/postgresql-server:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 5432
-        volumes:
-            - type: bind
-              source: ./../postgresql-server/bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./../postgresql-server/schema/
-              target: /opt/mediacloud/schema/
-            - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
-
-    rabbitmq-server:
-        image: gcr.io/mcback/rabbitmq-server:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 5672
-            - 15672
-        volumes:
-            - type: bind
-              source: ./../rabbitmq-server/conf/
-              target: /etc/rabbitmq/
diff --git a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/config.py b/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/config.py
deleted file mode 100644
index 782ed619c3..0000000000
--- a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/config.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from mediawords.util.config import file_with_env_value
-
-
-class PodcastFetchTranscriptConfig(object):
-    """
-    Podcast transcript fetcher configuration.
-    """
-
-    @staticmethod
-    def gc_auth_json_file() -> str:
-        """Return path to Google Cloud authentication JSON file."""
-        return file_with_env_value(name='MC_PODCAST_GC_AUTH_JSON_BASE64', encoded_with_base64=True)
diff --git a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/exceptions.py b/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/exceptions.py
deleted file mode 100644
index 0b64b540b6..0000000000
--- a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/exceptions.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import abc
-
-
-class _AbstractMcPodcastFetchTranscriptException(Exception, metaclass=abc.ABCMeta):
-    """Abstract exception."""
-    pass
-
-
-# ---
-
-
-class McPodcastFetchTranscriptSoftException(_AbstractMcPodcastFetchTranscriptException):
-    """Soft errors exception."""
-    pass
-
-
-class McOperationNotFoundException(McPodcastFetchTranscriptSoftException):
-    """Exception thrown when a transcription operation was not found for a particular operation ID."""
-    # Not a "hard" failure as sometimes these operations expire
-    pass
-
-
-# ---
-
-class McPodcastFetchTranscriptHardException(_AbstractMcPodcastFetchTranscriptException):
-    """Hard errors exception."""
-    pass
-
-
-class McDatabaseNotFoundException(McPodcastFetchTranscriptHardException):
-    """Exception thrown when we can't find something in the database that we've expected to find."""
-    pass
-
-
-class McDatabaseErrorException(McPodcastFetchTranscriptHardException):
-    """Exception thrown when a database raises an error."""
-    pass
-
-
-class McMisconfiguredSpeechAPIException(McPodcastFetchTranscriptHardException):
-    """Exception thrown when we receive something we didn't expect from Speech API."""
-    pass
-
-
-class McTranscriptionReturnedErrorException(McPodcastFetchTranscriptHardException):
-    """
-    Exception thrown when Speech API explicitly returns an error state.
-
-    When Speech API returns with an error, it's unclear whether it was us who have messed up or
-    something is (temporarily) wrong on their end, so on the safe side we throw a "hard" exception.
-    """
-    pass
diff --git a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/fetch_store.py b/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/fetch_store.py
deleted file mode 100644
index 72db28a80b..0000000000
--- a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/fetch_store.py
+++ /dev/null
@@ -1,118 +0,0 @@
-from typing import Optional
-
-from mediawords.db import DatabaseHandler
-from mediawords.util.log import create_logger
-
-from podcast_fetch_transcript.exceptions import (
-    McDatabaseErrorException,
-    McDatabaseNotFoundException,
-)
-from podcast_fetch_transcript.handler import AbstractHandler, DefaultHandler
-
-log = create_logger(__name__)
-
-NOT_READY_RETRY_INTERVAL = 60 * 10
-"""If the transcript is not ready yet, how many seconds to wait until retrying the fetch."""
-
-
-def fetch_store_transcript(
-        db: DatabaseHandler,
-        podcast_episode_transcript_fetches_id: int,
-        handler: Optional[AbstractHandler] = None,
-) -> Optional[int]:
-    """
-    Try fetching and storing the transcript and update "podcast_episode_transcript_fetches" depending on how well it
-    went.
-
-    :param db: Database handler.
-    :param podcast_episode_transcript_fetches_id: Transcript fetch ID.
-    :param handler: Object of a AbstractHandler subclass which implements fetching and storing (useful for testing).
-    :return: Story ID if transcript was fetched and stored, None otherwise.
-    """
-
-    if not handler:
-        handler = DefaultHandler()
-
-    transcript_fetch = db.query("""
-        UPDATE podcast_episode_transcript_fetches
-        SET fetched_at = NOW()
-        WHERE podcast_episode_transcript_fetches_id = %(podcast_episode_transcript_fetches_id)s
-        RETURNING *
-    """, {
-        'podcast_episode_transcript_fetches_id': podcast_episode_transcript_fetches_id,
-    }).hash()
-    if not transcript_fetch:
-        raise McDatabaseNotFoundException(
-            f"Transcript fetch for ID {podcast_episode_transcript_fetches_id} was not found."
-        )
-
-    try:
-
-        transcript = handler.fetch_transcript(
-            db=db,
-            podcast_episode_transcript_fetches_id=podcast_episode_transcript_fetches_id,
-        )
-
-        if transcript:
-            log.info(f"Transcript fetched, storing...")
-
-            handler.store_transcript(db=db, transcript=transcript)
-
-            db.query("""
-                UPDATE podcast_episode_transcript_fetches
-                SET result = 'success'
-                WHERE podcast_episode_transcript_fetches_id = %(podcast_episode_transcript_fetches_id)s
-            """, {
-                'podcast_episode_transcript_fetches_id': podcast_episode_transcript_fetches_id,
-            })
-
-        else:
-            log.info(f"Transcript is not done yet, will retry in {NOT_READY_RETRY_INTERVAL} seconds...")
-
-            db.query("""
-                INSERT INTO podcast_episode_transcript_fetches (
-                    podcast_episodes_id,
-                    add_to_queue_at
-                ) VALUES (
-                    %(podcast_episodes_id)s,
-                    NOW() + INTERVAL %(add_to_queue_interval)s
-                )
-            """, {
-                'podcast_episodes_id': transcript_fetch['podcast_episodes_id'],
-                'add_to_queue_interval': f"{NOT_READY_RETRY_INTERVAL} seconds",
-            })
-
-            db.query("""
-                UPDATE podcast_episode_transcript_fetches
-                SET result = 'in_progress'
-                WHERE podcast_episode_transcript_fetches_id = %(podcast_episode_transcript_fetches_id)s
-            """, {
-                'podcast_episode_transcript_fetches_id': podcast_episode_transcript_fetches_id,
-            })
-
-    except Exception as ex:
-
-        # Try logging exception to the database
-        try:
-            db.query("""
-                UPDATE podcast_episode_transcript_fetches
-                SET
-                    result = 'error',
-                    error_message = %(error_message)s
-                WHERE podcast_episode_transcript_fetches_id = %(podcast_episode_transcript_fetches_id)s
-            """, {
-                'podcast_episode_transcript_fetches_id': podcast_episode_transcript_fetches_id,
-                'error_message': str(ex),
-            })
-        except Exception as ex2:
-            raise McDatabaseErrorException((
-                f"Error while executing transcript fetch for ID {podcast_episode_transcript_fetches_id}: {ex}; "
-                f"further, I wasn't able to log it to database because: {ex2}"
-            ))
-
-        raise ex
-
-    if transcript:
-        return transcript.stories_id
-    else:
-        return None
diff --git a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/handler.py b/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/handler.py
deleted file mode 100644
index 8951f7c9fe..0000000000
--- a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/handler.py
+++ /dev/null
@@ -1,203 +0,0 @@
-import abc
-from typing import Optional
-
-# noinspection PyPackageRequirements
-from google.api_core.exceptions import InvalidArgument, NotFound, GoogleAPICallError
-# noinspection PyPackageRequirements
-from google.api_core.operation import from_gapic, Operation
-# noinspection PyPackageRequirements
-from google.api_core.operations_v1 import OperationsClient
-# noinspection PyPackageRequirements
-from google.cloud.speech_v1p1beta1 import SpeechClient, LongRunningRecognizeResponse, LongRunningRecognizeMetadata
-
-from mediawords.db import DatabaseHandler
-from mediawords.dbi.downloads import create_download_for_new_story
-from mediawords.dbi.downloads.store import store_content
-from mediawords.util.log import create_logger
-
-from podcast_fetch_transcript.config import PodcastFetchTranscriptConfig
-from podcast_fetch_transcript.exceptions import (
-    McDatabaseNotFoundException,
-    McMisconfiguredSpeechAPIException,
-    McOperationNotFoundException,
-    McTranscriptionReturnedErrorException,
-)
-from podcast_fetch_transcript.transcript import UtteranceAlternative, Utterance, Transcript
-
-log = create_logger(__name__)
-
-
-class AbstractHandler(object, metaclass=abc.ABCMeta):
-    """
-    Abstract class that fetches and stores a transcript.
-
-    Useful for testing as we can create a mock class which pretends to do it.
-    """
-
-    @classmethod
-    @abc.abstractmethod
-    def fetch_transcript(cls, db: DatabaseHandler, podcast_episode_transcript_fetches_id: int) -> Optional[Transcript]:
-        """
-        Attempt fetching a Speech API transcript for a given operation ID.
-
-        :param db: Database handler.
-        :param podcast_episode_transcript_fetches_id: Transcript fetch attempt ID.
-        :return: None if transcript is not finished yet, a Transcript object otherwise.
-        """
-        raise NotImplemented("Abstract method")
-
-    @classmethod
-    @abc.abstractmethod
-    def store_transcript(cls, db: DatabaseHandler, transcript: Transcript) -> int:
-        """
-        Store transcript to raw download store.
-
-        We could write this directly to "download_texts", but if we decide to reextract everything (after, say, updating
-        an extractor), that "download_texts" row might disappear, so it's safer to just store a raw download on the
-        key-value store as if it was a HTML file or something.
-
-        :param db: Database handler.
-        :param transcript: Transcript object.
-        :return: Download ID for a download that was created.
-        """
-        raise NotImplemented("Abstract method")
-
-
-class DefaultHandler(AbstractHandler):
-
-    @classmethod
-    def fetch_transcript(cls, db: DatabaseHandler, podcast_episode_transcript_fetches_id: int) -> Optional[Transcript]:
-        transcript_fetch = db.find_by_id(
-            table='podcast_episode_transcript_fetches',
-            object_id=podcast_episode_transcript_fetches_id,
-        )
-        if not transcript_fetch:
-            raise McDatabaseNotFoundException(
-                f"Unable to find transcript fetch with ID {podcast_episode_transcript_fetches_id}"
-            )
-        podcast_episodes_id = transcript_fetch['podcast_episodes_id']
-
-        episode = db.find_by_id(table='podcast_episodes', object_id=podcast_episodes_id)
-        if not episode:
-            raise McDatabaseNotFoundException(
-                f"Unable to find podcast episode with ID {podcast_episodes_id}"
-            )
-
-        stories_id = episode['stories_id']
-        speech_operation_id = episode['speech_operation_id']
-
-        if not speech_operation_id:
-            raise McMisconfiguredSpeechAPIException(f"Speech ID for podcast episode {podcast_episodes_id} is unset.")
-
-        try:
-            config = PodcastFetchTranscriptConfig()
-            client = SpeechClient.from_service_account_json(config.gc_auth_json_file())
-            operations_client = OperationsClient(channel=client._transport._grpc_channel)
-        except Exception as ex:
-            raise McMisconfiguredSpeechAPIException(f"Unable to initialize Speech API operations client: {ex}")
-
-        try:
-            operation = operations_client.get_operation(name=speech_operation_id)
-        except InvalidArgument as ex:
-            raise McMisconfiguredSpeechAPIException(f"Invalid operation ID '{speech_operation_id}': {ex}")
-        except NotFound as ex:
-            raise McOperationNotFoundException(f"Operation ID '{speech_operation_id}' was not found: {ex}")
-        except Exception as ex:
-            # On any other errors, raise a hard exception
-            raise McMisconfiguredSpeechAPIException(f"Error while fetching operation ID '{speech_operation_id}': {ex}")
-
-        if not operation:
-            raise McMisconfiguredSpeechAPIException(f"Operation is unset.")
-
-        try:
-            gapic_operation: Operation = from_gapic(
-                operation,
-                operations_client,
-                LongRunningRecognizeResponse,
-                metadata_type=LongRunningRecognizeMetadata,
-            )
-        except Exception as ex:
-            raise McMisconfiguredSpeechAPIException(f"Unable to create GAPIC operation: {ex}")
-
-        log.debug(f"GAPIC operation: {gapic_operation}")
-        log.debug(f"Operation metadata: {gapic_operation.metadata}")
-        log.debug(f"Operation is done: {gapic_operation.done()}")
-        log.debug(f"Operation error: {gapic_operation.done()}")
-
-        try:
-            operation_is_done = gapic_operation.done()
-        except Exception as ex:
-            # 'done' attribute might be gone in a newer version of the Speech API client
-            raise McMisconfiguredSpeechAPIException(
-                f"Unable to test whether operation '{speech_operation_id}' is done: {ex}"
-            )
-
-        if not operation_is_done:
-            log.info(f"Operation '{speech_operation_id}' is still not done.")
-            return None
-
-        utterances = []
-
-        try:
-            for result in gapic_operation.result().results:
-
-                alternatives = []
-                for alternative in result.alternatives:
-                    alternatives.append(
-                        UtteranceAlternative(
-                            text=alternative.transcript.strip(),
-                            confidence=alternative.confidence,
-                        )
-                    )
-
-                utterances.append(
-                    Utterance(
-                        alternatives=alternatives,
-                        bcp47_language_code=result.language_code,
-                    )
-                )
-
-        except GoogleAPICallError as ex:
-            raise McTranscriptionReturnedErrorException(
-                f"Unable to read transcript for operation '{speech_operation_id}': {ex}"
-            )
-
-        except Exception as ex:
-            raise McMisconfiguredSpeechAPIException(
-                f"Unable to read transcript for operation '{speech_operation_id}': {ex}"
-            )
-
-        return Transcript(stories_id=stories_id, utterances=utterances)
-
-    @classmethod
-    def _download_text_from_transcript(cls, transcript: Transcript) -> str:
-        best_utterance_alternatives = []
-        for utterance in transcript.utterances:
-            best_utterance_alternatives.append(utterance.best_alternative.text)
-        text = "\n\n".join(best_utterance_alternatives)
-        return text
-
-    @classmethod
-    def store_transcript(cls, db: DatabaseHandler, transcript: Transcript) -> int:
-        story = db.find_by_id(table='stories', object_id=transcript.stories_id)
-
-        feed = db.query("""
-            SELECT *
-            FROM feeds
-            WHERE feeds_id = (
-                SELECT feeds_id
-                FROM feeds_stories_map
-                WHERE stories_id = %(stories_id)s
-            )
-        """, {
-            'stories_id': transcript.stories_id,
-        }).hash()
-
-        download = create_download_for_new_story(db=db, story=story, feed=feed)
-
-        text = cls._download_text_from_transcript(transcript=transcript)
-
-        # Store as a raw download and then let "extract-and-vector" app "extract" the stored text later
-        store_content(db=db, download=download, content=text)
-
-        return download['downloads_id']
diff --git a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/transcript.py b/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/transcript.py
deleted file mode 100644
index edfbd257a4..0000000000
--- a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/transcript.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import dataclasses
-from typing import List
-
-
-@dataclasses.dataclass
-class UtteranceAlternative(object):
-    """One of the alternatives of what might have been said in an utterance."""
-
-    text: str
-    """Utterance text."""
-
-    confidence: float
-    """How confident Speech API is that it got it right."""
-
-
-@dataclasses.dataclass
-class Utterance(object):
-    """A single transcribed utterance (often but not always a single sentence)."""
-
-    alternatives: List[UtteranceAlternative]
-    """Alternatives of what might have been said in an utterance, ordered from the best to the worst guess."""
-
-    bcp47_language_code: str
-    """BCP 47 language code; might be different from what we've passed as the input."""
-
-    @property
-    def best_alternative(self) -> UtteranceAlternative:
-        """Return best alternative for what might have been said in an utterance."""
-        return self.alternatives[0]
-
-
-@dataclasses.dataclass
-class Transcript(object):
-    """A single transcript."""
-
-    stories_id: int
-    """Story ID."""
-
-    utterances: List[Utterance]
-    """List of ordered utterances in a transcript."""
diff --git a/apps/podcast-fetch-transcript/src/requirements.txt b/apps/podcast-fetch-transcript/src/requirements.txt
deleted file mode 100644
index 59e80a7b73..0000000000
--- a/apps/podcast-fetch-transcript/src/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-google-cloud-speech==2.0.1
diff --git a/apps/podcast-fetch-transcript/tests/data/media-samples b/apps/podcast-fetch-transcript/tests/data/media-samples
deleted file mode 160000
index 45b179fd86..0000000000
--- a/apps/podcast-fetch-transcript/tests/data/media-samples
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 45b179fd867b6031c803cdbb7eddafa7e204d5bd
diff --git a/apps/podcast-fetch-transcript/tests/python/setup_fetch.py b/apps/podcast-fetch-transcript/tests/python/setup_fetch.py
deleted file mode 100644
index 9569d89587..0000000000
--- a/apps/podcast-fetch-transcript/tests/python/setup_fetch.py
+++ /dev/null
@@ -1,182 +0,0 @@
-import abc
-import os
-import random
-import socket
-import time
-from typing import Union
-from unittest import TestCase
-
-from mediawords.db import connect_to_db
-from mediawords.job import JobBroker
-from mediawords.test.db.create import create_test_medium, create_test_feed
-from mediawords.test.hash_server import HashServer
-from mediawords.util.log import create_logger
-
-log = create_logger(__name__)
-
-
-class AbstractFetchTranscriptTestCase(TestCase, metaclass=abc.ABCMeta):
-    __slots__ = [
-        'db',
-        'hs',
-        'stories_id',
-        'transcript_fetches',
-    ]
-
-    @classmethod
-    @abc.abstractmethod
-    def input_media_path(cls) -> str:
-        """Return full path to input media file."""
-        raise NotImplemented("Abstract method")
-
-    @classmethod
-    @abc.abstractmethod
-    def input_media_mime_type(cls) -> str:
-        """Return input media file's MIME type."""
-        raise NotImplemented("Abstract method")
-
-    @classmethod
-    @abc.abstractmethod
-    def story_title_description(cls) -> str:
-        """Return a string to store as both story title and description."""
-        raise NotImplemented("Abstract method")
-
-    @classmethod
-    @abc.abstractmethod
-    def retries_per_step(cls) -> int:
-        """How many retries to do per each local step."""
-        raise NotImplemented("Abstract method")
-
-    @classmethod
-    @abc.abstractmethod
-    def seconds_between_retries(cls) -> float:
-        """How many seconds to wait between retries."""
-        raise NotImplemented("Abstract method")
-
-    def setUp(self) -> None:
-        super().setUp()
-
-        self.db = connect_to_db()
-
-        test_medium = create_test_medium(db=self.db, label='test')
-        test_feed = create_test_feed(db=self.db, label='test', medium=test_medium)
-
-        # Add a story with a random ID to decrease the chance that object in GCS will collide with another test running
-        # at the same time
-        self.stories_id = random.randint(1, 1000000)
-
-        self.db.query("""
-            INSERT INTO stories (
-                stories_id,
-                media_id,
-                url,
-                guid,
-                title,
-                description,
-                publish_date,
-                collect_date,
-                full_text_rss
-            ) VALUES (
-                %(stories_id)s,
-                %(media_id)s,
-                'http://story.test/',
-                'guid://story.test/',
-                'story',
-                'description',
-                '2016-10-15 08:00:00',
-                '2016-10-15 10:00:00',
-                true
-            )
-        """, {
-            'stories_id': self.stories_id,
-            'media_id': test_feed['media_id'],
-        })
-
-        # Create missing partitions for "feeds_stories_map"
-        self.db.query('SELECT create_missing_partitions()')
-
-        self.db.create(
-            table='feeds_stories_map',
-            insert_hash={
-                'feeds_id': int(test_feed['feeds_id']),
-                'stories_id': self.stories_id,
-            }
-        )
-
-        assert os.path.isfile(self.input_media_path()), f"Test media file '{self.input_media_path()}' should exist."
-
-        with open(self.input_media_path(), mode='rb') as f:
-            test_data = f.read()
-
-        # noinspection PyUnusedLocal
-        def __media_callback(request: HashServer.Request) -> Union[str, bytes]:
-            response = "".encode('utf-8')
-            response += "HTTP/1.0 200 OK\r\n".encode('utf-8')
-            response += f"Content-Type: {self.input_media_mime_type()}\r\n".encode('utf-8')
-            response += f"Content-Length: {len(test_data)}\r\n".encode('utf-8')
-            response += "\r\n".encode('utf-8')
-            response += test_data
-            return response
-
-        port = 8080  # Port exposed on docker-compose.tests.yml
-        media_path = '/test_media_file'
-        pages = {
-            media_path: {
-                'callback': __media_callback,
-            }
-        }
-
-        self.hs = HashServer(port=port, pages=pages)
-        self.hs.start()
-
-        # Using our hostname as it will be another container that will be connecting to us
-        media_url = f'http://{socket.gethostname()}:{port}{media_path}'
-
-        self.db.insert(table='story_enclosures', insert_hash={
-            'stories_id': self.stories_id,
-            'url': media_url,
-            'mime_type': self.input_media_mime_type(),
-            'length': len(test_data),
-        })
-
-        # Add a "podcast-fetch-episode" job
-        JobBroker(queue_name='MediaWords::Job::Podcast::FetchEpisode').add_to_queue(stories_id=self.stories_id)
-
-        total_time = int(self.retries_per_step() * self.seconds_between_retries())
-
-        # Wait for "podcast-fetch-episode" to transcode, upload to Google Storage, and write it to "podcast_episodes"
-        episodes = None
-        for x in range(1, self.retries_per_step() + 1):
-            log.info(f"Waiting for episode to appear (#{x})...")
-
-            episodes = self.db.select(table='podcast_episodes', what_to_select='*').hashes()
-            if episodes:
-                log.info(f"Episode is here!")
-                break
-
-            time.sleep(self.seconds_between_retries())
-
-        assert episodes, f"Episode didn't show up in {total_time} seconds."
-
-        # Wait for "podcast-submit-operation" to submit Speech API operation
-        self.transcript_fetches = None
-        for x in range(1, self.retries_per_step() + 1):
-            log.info(f"Waiting for transcript fetch to appear (#{x})...")
-
-            self.transcript_fetches = self.db.select(
-                table='podcast_episode_transcript_fetches',
-                what_to_select='*'
-            ).hashes()
-
-            if self.transcript_fetches:
-                log.info(f"Transcript fetch is here!")
-                break
-
-            time.sleep(self.seconds_between_retries())
-
-        assert self.transcript_fetches, f"Operation didn't show up in {total_time} seconds."
-
-    def tearDown(self) -> None:
-        super().tearDown()
-
-        self.hs.stop()
diff --git a/apps/podcast-fetch-transcript/tests/python/setup_mock_fetch_store.py b/apps/podcast-fetch-transcript/tests/python/setup_mock_fetch_store.py
deleted file mode 100644
index bf8065f670..0000000000
--- a/apps/podcast-fetch-transcript/tests/python/setup_mock_fetch_store.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import abc
-from unittest import TestCase
-
-from mediawords.db import connect_to_db
-from mediawords.test.db.create import create_test_medium, create_test_feed, create_test_story
-from mediawords.util.log import create_logger
-
-log = create_logger(__name__)
-
-
-class AbstractMockFetchStoreTestCase(TestCase, metaclass=abc.ABCMeta):
-    MOCK_SPEECH_OPERATION_ID = 'foo'
-
-    __slots__ = [
-        'db',
-        'enclosure',
-        'episode',
-        'transcript_fetch',
-        'podcast_episode_transcript_fetches_id',
-    ]
-
-    def setUp(self) -> None:
-        super().setUp()
-
-        self.db = connect_to_db()
-
-        test_medium = create_test_medium(db=self.db, label='test')
-        test_feed = create_test_feed(db=self.db, label='test', medium=test_medium)
-        test_story = create_test_story(db=self.db, feed=test_feed, label='test')
-
-        self.enclosure = self.db.insert(table='story_enclosures', insert_hash={
-            'stories_id': test_story['stories_id'],
-            'url': 'foo',
-            'mime_type': 'foo',
-            'length': 3,
-        })
-
-        self.episode = self.db.insert(table='podcast_episodes', insert_hash={
-            'stories_id': test_story['stories_id'],
-            'story_enclosures_id': self.enclosure['story_enclosures_id'],
-            'gcs_uri': 'gs://test',
-            'duration': 3,
-            'codec': 'FLAC',
-            'sample_rate': 44100,
-            'bcp47_language_code': 'en-US',
-            'speech_operation_id': self.MOCK_SPEECH_OPERATION_ID,
-        })
-
-        self.transcript_fetch = self.db.query("""
-            INSERT INTO podcast_episode_transcript_fetches (podcast_episodes_id, add_to_queue_at)
-            VALUES (%(podcast_episodes_id)s, NOW())
-            RETURNING *
-        """, {
-            'podcast_episodes_id': self.episode['podcast_episodes_id'],
-        }).hash()
-
-        self.podcast_episode_transcript_fetches_id = self.transcript_fetch['podcast_episode_transcript_fetches_id']
diff --git a/apps/podcast-fetch-transcript/tests/python/test_fetch_long_audio.py b/apps/podcast-fetch-transcript/tests/python/test_fetch_long_audio.py
deleted file mode 100644
index ef246d5bf8..0000000000
--- a/apps/podcast-fetch-transcript/tests/python/test_fetch_long_audio.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import os
-import time
-
-import pytest
-
-from mediawords.util.log import create_logger
-
-from podcast_fetch_transcript.handler import DefaultHandler
-
-from .setup_fetch import AbstractFetchTranscriptTestCase
-
-log = create_logger(__name__)
-
-
-@pytest.mark.skipif('MC_PODCAST_FETCH_TRANSCRIPT_RUN_COSTLY_TEST' not in os.environ,
-                    reason="Costly; each run costs about 60 / 4 * 0.009 = $0.04")
-class LongAudioTestCase(AbstractFetchTranscriptTestCase):
-    """Test the full chain against a long audio file to try out whether podcast-fetch-transcript manages to back off."""
-
-    @classmethod
-    def input_media_path(cls) -> str:
-        return '/opt/mediacloud/tests/data/media-samples/samples/nixon_speech-vorbis-1m.ogg'
-
-    @classmethod
-    def input_media_mime_type(cls) -> str:
-        return 'audio/ogg'
-
-    @classmethod
-    def story_title_description(cls) -> str:
-        return 'Resignation speech of United States President Richard Nixon'
-
-    @classmethod
-    def retries_per_step(cls) -> int:
-        # Try more often and wait for longer as this is a bigger file
-        return 60
-
-    @classmethod
-    def seconds_between_retries(cls) -> float:
-        return 1.0
-
-    def test_long_audio(self):
-        transcript = None
-
-        handler = DefaultHandler()
-
-        # Input audio file is 1m0s, so wait for at least two minutes
-        for x in range(1, 12 + 1):
-            log.info(f"Waiting for transcript to be finished (#{x})...")
-
-            podcast_episode_transcript_fetches_id = self.transcript_fetches[0]['podcast_episode_transcript_fetches_id']
-            transcript = handler.fetch_transcript(
-                db=self.db,
-                podcast_episode_transcript_fetches_id=podcast_episode_transcript_fetches_id
-            )
-
-            if transcript:
-                log.info("Transcript is here!")
-                break
-
-            time.sleep(5)
-
-        print(transcript)
-
-        assert transcript
-        assert transcript.stories_id
-        assert len(transcript.utterances) > 0
-        assert len(transcript.utterances[0].alternatives) > 0
-        assert 'evening' in transcript.utterances[0].alternatives[0].text.lower()
diff --git a/apps/podcast-fetch-transcript/tests/python/test_fetch_store_full_chain.py b/apps/podcast-fetch-transcript/tests/python/test_fetch_store_full_chain.py
deleted file mode 100644
index 1e6933a36d..0000000000
--- a/apps/podcast-fetch-transcript/tests/python/test_fetch_store_full_chain.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import time
-
-from mediawords.dbi.downloads.store import fetch_content
-from mediawords.util.log import create_logger
-from podcast_fetch_transcript.handler import DefaultHandler
-
-from .setup_fetch import AbstractFetchTranscriptTestCase
-
-log = create_logger(__name__)
-
-
-class FullChainTestCase(AbstractFetchTranscriptTestCase):
-    """Test the full chain against a small audio file."""
-
-    @classmethod
-    def input_media_path(cls) -> str:
-        # Run the test with AAC file to test out both transcoding to FLAC and whether Speech API can transcribe audio
-        # files after lossy -> lossless transcoding
-        return '/opt/mediacloud/tests/data/media-samples/samples/kim_kardashian-aac.m4a'
-
-    @classmethod
-    def input_media_mime_type(cls) -> str:
-        return 'audio/mp4'
-
-    @classmethod
-    def story_title_description(cls) -> str:
-        # 'label' is important as it will be stored in both stories.title and stories.description, which in turn will be
-        # used to guess the probable language of the podcast episode
-        return 'keeping up with Kardashians'
-
-    @classmethod
-    def retries_per_step(cls) -> int:
-        return 120
-
-    @classmethod
-    def seconds_between_retries(cls) -> float:
-        return 0.5
-
-    def test_full_chain(self):
-        transcript = None
-
-        handler = DefaultHandler()
-
-        for x in range(1, 60 + 1):
-            log.info(f"Waiting for transcript to be finished (#{x})...")
-
-            podcast_episode_transcript_fetches_id = self.transcript_fetches[0]['podcast_episode_transcript_fetches_id']
-            transcript = handler.fetch_transcript(
-                db=self.db,
-                podcast_episode_transcript_fetches_id=podcast_episode_transcript_fetches_id
-            )
-            if transcript:
-                log.info("Transcript is here!")
-                break
-
-            time.sleep(2)
-
-        assert transcript
-        assert transcript.stories_id
-        assert len(transcript.utterances) == 1
-        assert len(transcript.utterances[0].alternatives) == 1
-        assert 'kim kardashian' in transcript.utterances[0].alternatives[0].text.lower()
-
-        downloads_id = handler.store_transcript(db=self.db, transcript=transcript)
-
-        download = self.db.find_by_id(table='downloads', object_id=downloads_id)
-
-        raw_download = fetch_content(db=self.db, download=download)
-        assert raw_download
-        assert 'kim kardashian' in raw_download.lower()
diff --git a/apps/podcast-fetch-transcript/tests/python/test_mock_error.py b/apps/podcast-fetch-transcript/tests/python/test_mock_error.py
deleted file mode 100644
index 31033cbf86..0000000000
--- a/apps/podcast-fetch-transcript/tests/python/test_mock_error.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from typing import Optional
-
-import pytest
-
-from mediawords.db import DatabaseHandler
-
-from podcast_fetch_transcript.exceptions import McPodcastFetchTranscriptHardException
-from podcast_fetch_transcript.fetch_store import fetch_store_transcript
-from podcast_fetch_transcript.handler import AbstractHandler
-from podcast_fetch_transcript.transcript import Transcript
-
-from .setup_mock_fetch_store import AbstractMockFetchStoreTestCase
-
-
-class MockTranscriptErrorWithExceptionHandler(AbstractHandler):
-    """Mock handler that fails the transcription with soft error."""
-
-    @classmethod
-    def fetch_transcript(cls, db: DatabaseHandler, podcast_episode_transcript_fetches_id: int) -> Optional[Transcript]:
-        raise McPodcastFetchTranscriptHardException("Some sort of a permanent problem")
-
-    @classmethod
-    def store_transcript(cls, db: DatabaseHandler, transcript: Transcript) -> int:
-        raise NotImplemented("Shouldn't be called.")
-
-
-class MockErrorTestCase(AbstractMockFetchStoreTestCase):
-
-    def test_error(self):
-        handler = MockTranscriptErrorWithExceptionHandler()
-
-        with pytest.raises(McPodcastFetchTranscriptHardException):
-            fetch_store_transcript(
-                db=self.db,
-                podcast_episode_transcript_fetches_id=self.podcast_episode_transcript_fetches_id,
-                handler=handler,
-            )
-
-        transcript_fetches = self.db.select(table='podcast_episode_transcript_fetches', what_to_select='*').hashes()
-        assert len(transcript_fetches) == 1
-
-        transcript_fetch = transcript_fetches[0]
-        assert transcript_fetch['fetched_at']
-        assert transcript_fetch['result'] == 'error'
-        assert 'permanent problem' in transcript_fetch['error_message']
diff --git a/apps/podcast-fetch-transcript/tests/python/test_mock_not_done.py b/apps/podcast-fetch-transcript/tests/python/test_mock_not_done.py
deleted file mode 100644
index 1d4e7cf1f9..0000000000
--- a/apps/podcast-fetch-transcript/tests/python/test_mock_not_done.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from typing import Optional
-
-from mediawords.db import DatabaseHandler
-
-from podcast_fetch_transcript.fetch_store import fetch_store_transcript
-from podcast_fetch_transcript.handler import AbstractHandler
-from podcast_fetch_transcript.transcript import Transcript
-
-from .setup_mock_fetch_store import AbstractMockFetchStoreTestCase
-
-
-class MockTranscriptNotDoneHandler(AbstractHandler):
-    """Mock handler that reports that the transcript is not yet done."""
-
-    @classmethod
-    def fetch_transcript(cls, db: DatabaseHandler, podcast_episode_transcript_fetches_id: int) -> Optional[Transcript]:
-        return None
-
-    @classmethod
-    def store_transcript(cls, db: DatabaseHandler, transcript: Transcript) -> int:
-        raise NotImplemented("Shouldn't be called.")
-
-
-class MockFailedTestCase(AbstractMockFetchStoreTestCase):
-
-    def test_not_done(self):
-        handler = MockTranscriptNotDoneHandler()
-
-        stories_id = fetch_store_transcript(
-            db=self.db,
-            podcast_episode_transcript_fetches_id=self.podcast_episode_transcript_fetches_id,
-            handler=handler,
-        )
-        assert stories_id is None
-
-        transcript_fetches = self.db.query("""
-            SELECT *
-            FROM podcast_episode_transcript_fetches
-            ORDER BY podcast_episode_transcript_fetches_id
-        """).hashes()
-        assert len(transcript_fetches) == 2, "One fetch that's still in progress, another one added for the future."
-
-        transcript_fetch_in_progress = transcript_fetches[0]
-        assert transcript_fetch_in_progress['fetched_at']
-        assert transcript_fetch_in_progress['result'] == 'in_progress'
-        assert not transcript_fetch_in_progress['error_message']
-
-        transcript_fetch_readded = transcript_fetches[1]
-        assert transcript_fetch_readded['add_to_queue_at']
-        assert not transcript_fetch_readded['added_to_queue_at']
-        assert not transcript_fetch_readded['fetched_at']
-        assert not transcript_fetch_readded['result']
-        assert not transcript_fetch_readded['error_message']
diff --git a/apps/podcast-fetch-transcript/tests/python/test_mock_success.py b/apps/podcast-fetch-transcript/tests/python/test_mock_success.py
deleted file mode 100644
index d388b58f0b..0000000000
--- a/apps/podcast-fetch-transcript/tests/python/test_mock_success.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from typing import Optional
-
-from mediawords.db import DatabaseHandler
-from podcast_fetch_transcript.fetch_store import fetch_store_transcript
-
-from podcast_fetch_transcript.handler import AbstractHandler
-from podcast_fetch_transcript.transcript import Transcript, Utterance, UtteranceAlternative
-
-from .setup_mock_fetch_store import AbstractMockFetchStoreTestCase
-
-
-class MockTranscriptSuccessHandler(AbstractHandler):
-    """Mock handler that fetches the transcription successfully."""
-
-    @classmethod
-    def fetch_transcript(cls, db: DatabaseHandler, podcast_episode_transcript_fetches_id: int) -> Optional[Transcript]:
-        return Transcript(
-            stories_id=42,
-            utterances=[
-                Utterance(
-                    alternatives=[
-                        UtteranceAlternative(
-                            text='Kim Kardashian.',
-                            confidence=1.00,
-                        )
-                    ],
-                    bcp47_language_code='en-US',
-                ),
-            ]
-        )
-
-    @classmethod
-    def store_transcript(cls, db: DatabaseHandler, transcript: Transcript) -> int:
-        return transcript.stories_id
-
-
-class MockSuccessTestCase(AbstractMockFetchStoreTestCase):
-
-    def test_success(self):
-        handler = MockTranscriptSuccessHandler()
-
-        stories_id = fetch_store_transcript(
-            db=self.db,
-            podcast_episode_transcript_fetches_id=self.podcast_episode_transcript_fetches_id,
-            handler=handler,
-        )
-        assert stories_id
-
-        transcript_fetches = self.db.select(table='podcast_episode_transcript_fetches', what_to_select='*').hashes()
-        assert len(transcript_fetches) == 1
-
-        transcript_fetch = transcript_fetches[0]
-        assert transcript_fetch['fetched_at']
-        assert transcript_fetch['result'] == 'success'
-        assert not transcript_fetch['error_message']
diff --git a/apps/podcast-poll-due-operations/.idea/externalDependencies.xml b/apps/podcast-poll-due-operations/.idea/externalDependencies.xml
deleted file mode 100644
index 7872ffbcf2..0000000000
--- a/apps/podcast-poll-due-operations/.idea/externalDependencies.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ExternalDependencies">
-    <plugin id="Docker" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/.idea/inspectionProfiles/profiles_settings.xml b/apps/podcast-poll-due-operations/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2da2d..0000000000
--- a/apps/podcast-poll-due-operations/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/.idea/misc.xml b/apps/podcast-poll-due-operations/.idea/misc.xml
deleted file mode 100644
index 46a8a5a238..0000000000
--- a/apps/podcast-poll-due-operations/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="JavaScriptSettings">
-    <option name="languageLevel" value="ES6" />
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (podcast-poll-due-operations at [/home/pypt/m/apps/podcast-poll-due-operations/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/.idea/modules.xml b/apps/podcast-poll-due-operations/.idea/modules.xml
deleted file mode 100644
index d113be0932..0000000000
--- a/apps/podcast-poll-due-operations/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/podcast-poll-due-operations.iml" filepath="$PROJECT_DIR$/.idea/podcast-poll-due-operations.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/.idea/podcast-poll-due-operations.iml b/apps/podcast-poll-due-operations/.idea/podcast-poll-due-operations.iml
deleted file mode 100644
index 83a606a6bd..0000000000
--- a/apps/podcast-poll-due-operations/.idea/podcast-poll-due-operations.iml
+++ /dev/null
@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (podcast-poll-due-operations at [/home/pypt/m/apps/podcast-poll-due-operations/docker-compose.tests.yml])" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="pytest" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/.idea/runConfigurations/Dockerfile.xml b/apps/podcast-poll-due-operations/.idea/runConfigurations/Dockerfile.xml
deleted file mode 100644
index e6a39721ac..0000000000
--- a/apps/podcast-poll-due-operations/.idea/runConfigurations/Dockerfile.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<component name="ProjectRunConfigurationManager">
-  <configuration default="false" name="Dockerfile" type="docker-deploy" factoryName="dockerfile" server-name="mediacloud">
-    <deployment type="dockerfile">
-      <settings>
-        <option name="buildCliOptions" value="--cache-from gcr.io/mcback/podcast-poll-due-operations:latest" />
-        <option name="buildOnly" value="true" />
-        <option name="command" value="" />
-        <option name="containerName" value="" />
-        <option name="entrypoint" value="" />
-        <option name="imageTag" value="gcr.io/mcback/podcast-poll-due-operations:latest" />
-        <option name="commandLineOptions" value="" />
-        <option name="sourceFilePath" value="Dockerfile" />
-      </settings>
-    </deployment>
-    <method v="2" />
-  </configuration>
-</component>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/Dockerfile b/apps/podcast-poll-due-operations/Dockerfile
deleted file mode 100644
index 9de70b053f..0000000000
--- a/apps/podcast-poll-due-operations/Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Poll database for operations which should be done by now, add transcription fetch for due operations
-#
-
-FROM gcr.io/mcback/common:latest
-
-# Copy sources
-COPY src/ /opt/mediacloud/src/podcast-poll-due-operations/
-ENV PERL5LIB="/opt/mediacloud/src/podcast-poll-due-operations/perl:${PERL5LIB}" \
-    PYTHONPATH="/opt/mediacloud/src/podcast-poll-due-operations/python:${PYTHONPATH}"
-
-# Copy worker script
-COPY bin /opt/mediacloud/bin
-
-USER mediacloud
-
-CMD ["podcast_poll_due_operations_worker.py"]
diff --git a/apps/podcast-poll-due-operations/bin/podcast_poll_due_operations_worker.py b/apps/podcast-poll-due-operations/bin/podcast_poll_due_operations_worker.py
deleted file mode 100755
index a87a2763ad..0000000000
--- a/apps/podcast-poll-due-operations/bin/podcast_poll_due_operations_worker.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-
-from mediawords.job import JobBroker
-from mediawords.util.process import fatal_error
-
-from podcast_poll_due_operations.due_operations import poll_for_due_operations, AbstractFetchTranscriptQueue
-
-
-class JobBrokerFetchTranscriptQueue(AbstractFetchTranscriptQueue):
-    """Add fetch transcript jobs to job broker's queue."""
-
-    def add_to_queue(self, podcast_episode_transcript_fetches_id: int) -> None:
-        JobBroker(queue_name='MediaWords::Job::Podcast::FetchTranscript').add_to_queue(
-            podcast_episode_transcript_fetches_id=podcast_episode_transcript_fetches_id,
-        )
-
-
-if __name__ == '__main__':
-    try:
-        fetch_transcript_queue = JobBrokerFetchTranscriptQueue()
-        poll_for_due_operations(fetch_transcript_queue=fetch_transcript_queue)
-    except Exception as ex:
-        # Hard and unknown errors (no soft errors here)
-        fatal_error(f"Unable to poll for due operations: {ex}")
diff --git a/apps/podcast-poll-due-operations/docker-compose.tests.yml b/apps/podcast-poll-due-operations/docker-compose.tests.yml
deleted file mode 100644
index 912d4cc95e..0000000000
--- a/apps/podcast-poll-due-operations/docker-compose.tests.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-version: "3.7"
-
-services:
-
-    podcast-poll-due-operations:
-        image: gcr.io/mcback/podcast-poll-due-operations:latest
-        init: true
-        stop_signal: SIGKILL
-        volumes:
-            - type: bind
-              source: ./bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./src/
-              target: /opt/mediacloud/src/podcast-poll-due-operations/
-            - type: bind
-              source: ./tests/
-              target: /opt/mediacloud/tests/
-            - type: bind
-              source: ./../common/src/
-              target: /opt/mediacloud/src/common/
-        depends_on:
-            - postgresql-pgbouncer
-            # We don't need "rabbitmq-server" to run tests
-
-    postgresql-pgbouncer:
-        image: gcr.io/mcback/postgresql-pgbouncer:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 6432
-        volumes:
-            - type: bind
-              source: ./../postgresql-pgbouncer/conf/
-              target: /etc/pgbouncer/
-        depends_on:
-            - postgresql-server
-
-    postgresql-server:
-        image: gcr.io/mcback/postgresql-server:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 5432
-        volumes:
-            - type: bind
-              source: ./../postgresql-server/bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./../postgresql-server/schema/
-              target: /opt/mediacloud/schema/
-            - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
diff --git a/apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/due_operations.py b/apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/due_operations.py
deleted file mode 100644
index ea82dc8949..0000000000
--- a/apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/due_operations.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import abc
-import time
-
-from mediawords.db import connect_to_db
-from mediawords.util.log import create_logger
-
-from podcast_poll_due_operations.exceptions import McJobBrokerErrorException
-
-log = create_logger(__name__)
-
-
-class AbstractFetchTranscriptQueue(object, metaclass=abc.ABCMeta):
-    """
-    Abstract class for adding a story ID to the "podcast-fetch-transcript" queue.
-
-    Useful for testing as having such a class can help us find out whether stories get added to the actual job queue.
-    """
-
-    @abc.abstractmethod
-    def add_to_queue(self, podcast_episode_transcript_fetches_id: int) -> None:
-        """
-        Add story ID to "podcast-fetch-transcript" job queue.
-
-        :param podcast_episode_transcript_fetches_id: Transcript fetch ID.
-        """
-        raise NotImplemented("Abstract method")
-
-
-def poll_for_due_operations(fetch_transcript_queue: AbstractFetchTranscriptQueue,
-                            stop_after_first_empty_chunk: bool = False,
-                            wait_after_empty_poll: int = 30,
-                            stories_chunk_size: int = 100) -> None:
-    """
-    Continuously poll for due operations, add such operations to "podcast-fetch-transcript" queue.
-
-    Never returns, unless 'stop_after_first_empty_chunk' is set.
-
-    :param fetch_transcript_queue: Queue helper object to use for adding a story ID to "podcast-fetch-transcript"
-                                      queue (useful for testing).
-    :param stop_after_first_empty_chunk: If True, stop after the first attempt to fetch a chunk of due story IDs comes
-                                         out empty (useful for testing).
-    :param wait_after_empty_poll: Seconds to wait after there were no due story IDs found.
-    :param stories_chunk_size: Max. due story IDs to fetch in one go; the chunk will be deleted + returned in a
-                               transaction, which will get reverted if RabbitMQ fails, so we don't want to
-                               hold that transaction for too long.
-    """
-
-    if not fetch_transcript_queue:
-        raise McJobBrokerErrorException(f"Fetch transcript queue object is unset.")
-
-    while True:
-
-        db = connect_to_db()
-
-        log.info("Polling...")
-        due_operations = db.query("""
-            SELECT
-                podcast_episode_transcript_fetches_id,
-                add_to_queue_at
-            FROM podcast_episode_transcript_fetches
-            
-            -- Transcript fetch is due
-            WHERE add_to_queue_at <= NOW()
-            
-            -- Transcript fetch wasn't added to the job broker's queue yet
-              AND podcast_episode_transcript_was_added_to_queue(added_to_queue_at) = 'f'
-            
-            -- Get the oldest operations first
-            ORDER BY add_to_queue_at
-
-            -- Don't fetch too much of stories at once
-            LIMIT %(stories_chunk_size)s
-        """, {
-            'stories_chunk_size': stories_chunk_size,
-        }).hashes()
-
-        if due_operations:
-
-            try:
-                log.info(f"Adding {len(due_operations)} due operations to the transcription fetch queue...")
-
-                for operation in due_operations:
-                    podcast_episode_transcript_fetches_id = operation['podcast_episode_transcript_fetches_id']
-                    log.debug(
-                        f"Adding fetch ID {podcast_episode_transcript_fetches_id} to the transcription fetch queue..."
-                    )
-                    fetch_transcript_queue.add_to_queue(
-                        podcast_episode_transcript_fetches_id=podcast_episode_transcript_fetches_id,
-                    )
-
-                    # Update "added_to_queue_at" individually in case RabbitMQ decides to fail on us
-                    db.query("""
-                        UPDATE podcast_episode_transcript_fetches
-                        SET added_to_queue_at = NOW()
-                        WHERE podcast_episode_transcript_fetches_id = %(podcast_episode_transcript_fetches_id)s
-                    """, {
-                        'podcast_episode_transcript_fetches_id': podcast_episode_transcript_fetches_id,
-                    })
-
-                log.info(f"Done adding {len(due_operations)} due operations to the transcription fetch queue")
-            except Exception as ex:
-
-                raise McJobBrokerErrorException(f"Unable to add one or more stories the the job queue: {ex}")
-
-        else:
-
-            if stop_after_first_empty_chunk:
-                log.info(f"No due story IDs found, stopping...")
-                break
-            else:
-                log.info(f"No due story IDs found, waiting for {wait_after_empty_poll} seconds...")
-                time.sleep(wait_after_empty_poll)
diff --git a/apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/exceptions.py b/apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/exceptions.py
deleted file mode 100644
index 1bc47e477a..0000000000
--- a/apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/exceptions.py
+++ /dev/null
@@ -1,13 +0,0 @@
-class McPodcastPollDueOperationsHardException(Exception):
-    """Hard errors exception."""
-    pass
-
-
-class McDatabaseErrorException(McPodcastPollDueOperationsHardException):
-    """Exception thrown when we encounter a database error."""
-    pass
-
-
-class McJobBrokerErrorException(McPodcastPollDueOperationsHardException):
-    """Exception thrown when we encounter a job broker (RabbitMQ) error."""
-    pass
diff --git a/apps/podcast-poll-due-operations/tests/python/setup_due_operation.py b/apps/podcast-poll-due-operations/tests/python/setup_due_operation.py
deleted file mode 100644
index 5fcc58e52d..0000000000
--- a/apps/podcast-poll-due-operations/tests/python/setup_due_operation.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import abc
-from unittest import TestCase
-
-from mediawords.db import connect_to_db
-from mediawords.test.db.create import create_test_medium, create_test_feed, create_test_story
-
-
-class SetupTestOperation(TestCase, metaclass=abc.ABCMeta):
-    __slots__ = [
-        'db',
-        'test_medium',
-        'test_feed',
-        'story',
-        'stories_id',
-    ]
-
-    def setUp(self):
-        self.db = connect_to_db()
-
-        self.test_medium = create_test_medium(db=self.db, label='test')
-        self.test_feed = create_test_feed(db=self.db, label='test', medium=self.test_medium)
-        self.story = create_test_story(db=self.db, label='test', feed=self.test_feed)
-
-        stories_id = self.story['stories_id']
-
-        enclosure = self.db.insert(table='story_enclosures', insert_hash={
-            'stories_id': stories_id,
-            # URL doesn't really matter as we won't be fetching it
-            'url': 'http://example.com/',
-            'mime_type': 'audio/mpeg',
-            'length': 100000,
-        })
-
-        episode = self.db.insert(table='podcast_episodes', insert_hash={
-            'stories_id': stories_id,
-            'story_enclosures_id': enclosure['story_enclosures_id'],
-            'gcs_uri': 'gs://whatever',
-            'duration': 1,
-            'codec': 'MP3',
-            'sample_rate': 44100,
-            'bcp47_language_code': 'en-US',
-            'speech_operation_id': 'foo',
-        })
-
-        self.db.query("""
-            INSERT INTO podcast_episode_transcript_fetches (
-                podcast_episodes_id,
-                add_to_queue_at
-            ) VALUES (
-                %(podcast_episodes_id)s,
-                NOW()
-            )
-        """, {
-            'podcast_episodes_id': episode['podcast_episodes_id'],
-        })
diff --git a/apps/podcast-poll-due-operations/tests/python/test_due_operations.py b/apps/podcast-poll-due-operations/tests/python/test_due_operations.py
deleted file mode 100644
index d121178e03..0000000000
--- a/apps/podcast-poll-due-operations/tests/python/test_due_operations.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from podcast_poll_due_operations.due_operations import poll_for_due_operations, AbstractFetchTranscriptQueue
-
-from .setup_due_operation import SetupTestOperation
-
-
-class MockCounterFetchTranscriptQueue(AbstractFetchTranscriptQueue):
-    __slots__ = [
-        'story_count',
-    ]
-
-    def __init__(self):
-        self.story_count = 0
-
-    def add_to_queue(self, podcast_episode_transcript_fetches_id: int) -> None:
-        self.story_count += 1
-
-
-class TestPollForDueOperations(SetupTestOperation):
-
-    def test_poll_for_due_operations(self):
-        """Simple test."""
-
-        fetch_transcript_queue = MockCounterFetchTranscriptQueue()
-
-        poll_for_due_operations(
-            fetch_transcript_queue=fetch_transcript_queue,
-            stop_after_first_empty_chunk=True,
-        )
-
-        all_fetches = self.db.select(
-            table='podcast_episode_transcript_fetches',
-            what_to_select='*',
-        ).hashes()
-
-        assert len(all_fetches) == 1, "The fetch should have been kept in the table."
-        fetch = all_fetches[0]
-
-        assert fetch['added_to_queue_at'], "Timestamp for when the fetch as added to the queue should be set."
-
-        assert fetch_transcript_queue.story_count == 1, "A single story should have been added to the fetch queue."
diff --git a/apps/podcast-poll-due-operations/tests/python/test_failing_job_broker.py b/apps/podcast-poll-due-operations/tests/python/test_failing_job_broker.py
deleted file mode 100644
index a0e6897f80..0000000000
--- a/apps/podcast-poll-due-operations/tests/python/test_failing_job_broker.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pytest
-
-from podcast_poll_due_operations.due_operations import poll_for_due_operations, AbstractFetchTranscriptQueue
-from podcast_poll_due_operations.exceptions import McJobBrokerErrorException
-
-from .setup_due_operation import SetupTestOperation
-
-
-class MockFailingFetchTranscriptQueue(AbstractFetchTranscriptQueue):
-
-    def add_to_queue(self, podcast_episode_transcript_fetches_id: int) -> None:
-        raise Exception("Job broker is down")
-
-
-class TestFailingJobBroker(SetupTestOperation):
-
-    def test_failing_job_broker(self):
-        """Test what happens if the job broker fails."""
-
-        fetch_transcript_queue = MockFailingFetchTranscriptQueue()
-
-        with pytest.raises(McJobBrokerErrorException):
-            poll_for_due_operations(
-                fetch_transcript_queue=fetch_transcript_queue,
-                stop_after_first_empty_chunk=True,
-            )
-
-        all_fetches = self.db.select(
-            table='podcast_episode_transcript_fetches',
-            what_to_select='*',
-        ).hashes()
-
-        assert len(all_fetches) == 1, "The fetch should have been kept in the table."
-        fetch = all_fetches[0]
-
-        assert not fetch['added_to_queue_at'], "Timestamp for when the fetch as added to the queue should be empty."
diff --git a/apps/podcast-submit-operation/.idea/externalDependencies.xml b/apps/podcast-submit-operation/.idea/externalDependencies.xml
deleted file mode 100644
index 7872ffbcf2..0000000000
--- a/apps/podcast-submit-operation/.idea/externalDependencies.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ExternalDependencies">
-    <plugin id="Docker" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/.idea/inspectionProfiles/profiles_settings.xml b/apps/podcast-submit-operation/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2da2d..0000000000
--- a/apps/podcast-submit-operation/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/.idea/misc.xml b/apps/podcast-submit-operation/.idea/misc.xml
deleted file mode 100644
index 06b8bbff3f..0000000000
--- a/apps/podcast-submit-operation/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="JavaScriptSettings">
-    <option name="languageLevel" value="ES6" />
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (podcast-submit-operation at [/home/pypt/m/apps/podcast-submit-operation/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/.idea/podcast-submit-operation.iml b/apps/podcast-submit-operation/.idea/podcast-submit-operation.iml
deleted file mode 100644
index 1c3aa105bd..0000000000
--- a/apps/podcast-submit-operation/.idea/podcast-submit-operation.iml
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (podcast-submit-operation at [/home/pypt/m/apps/podcast-submit-operation/docker-compose.tests.yml])" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="PackageRequirementsSettings">
-    <option name="requirementsPath" value="$MODULE_DIR$/src/requirements.txt" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="pytest" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/.idea/runConfigurations/Dockefile.xml b/apps/podcast-submit-operation/.idea/runConfigurations/Dockefile.xml
deleted file mode 100644
index b5e047b7b0..0000000000
--- a/apps/podcast-submit-operation/.idea/runConfigurations/Dockefile.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<component name="ProjectRunConfigurationManager">
-  <configuration default="false" name="Dockefile" type="docker-deploy" factoryName="dockerfile" server-name="mediacloud">
-    <deployment type="dockerfile">
-      <settings>
-        <option name="buildCliOptions" value="--cache-from gcr.io/mcback/podcast-submit-operation:latest" />
-        <option name="buildOnly" value="true" />
-        <option name="command" value="" />
-        <option name="containerName" value="" />
-        <option name="entrypoint" value="" />
-        <option name="imageTag" value="gcr.io/mcback/podcast-submit-operation:latest" />
-        <option name="commandLineOptions" value="" />
-        <option name="sourceFilePath" value="Dockerfile" />
-      </settings>
-    </deployment>
-    <method v="2" />
-  </configuration>
-</component>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/.idea/sqldialects.xml b/apps/podcast-submit-operation/.idea/sqldialects.xml
deleted file mode 100644
index 790b3f37f8..0000000000
--- a/apps/podcast-submit-operation/.idea/sqldialects.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="SqlDialectMappings">
-    <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
-    <file url="PROJECT" dialect="PostgreSQL" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/.idea/vcs.xml b/apps/podcast-submit-operation/.idea/vcs.xml
deleted file mode 100644
index b2bdec2d71..0000000000
--- a/apps/podcast-submit-operation/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/Dockerfile b/apps/podcast-submit-operation/Dockerfile
deleted file mode 100644
index acf06f32e4..0000000000
--- a/apps/podcast-submit-operation/Dockerfile
+++ /dev/null
@@ -1,26 +0,0 @@
-#
-# Submit a long running operation to Google Speech to Text API for it to transcribe the episode
-#
-
-FROM gcr.io/mcback/common:latest
-
-# Install Python dependencies
-COPY src/requirements.txt /var/tmp/
-RUN \
-    cd /var/tmp/ && \
-    pip3 install -r requirements.txt && \
-    rm requirements.txt && \
-    rm -rf /root/.cache/ && \
-    true
-
-# Copy sources
-COPY src/ /opt/mediacloud/src/podcast-submit-operation/
-ENV PERL5LIB="/opt/mediacloud/src/podcast-submit-operation/perl:${PERL5LIB}" \
-    PYTHONPATH="/opt/mediacloud/src/podcast-submit-operation/python:${PYTHONPATH}"
-
-# Copy worker script
-COPY bin /opt/mediacloud/bin
-
-USER mediacloud
-
-CMD ["podcast_submit_operation_worker.py"]
diff --git a/apps/podcast-submit-operation/bin/podcast_submit_operation_worker.py b/apps/podcast-submit-operation/bin/podcast_submit_operation_worker.py
deleted file mode 100755
index ae3b712e80..0000000000
--- a/apps/podcast-submit-operation/bin/podcast_submit_operation_worker.py
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env python3
-
-from mediawords.db import connect_to_db
-from mediawords.job import JobBroker
-from mediawords.util.log import create_logger
-from mediawords.util.perl import decode_object_from_bytes_if_needed
-from mediawords.util.process import fatal_error
-
-from podcast_submit_operation.exceptions import McPodcastSubmitOperationSoftException
-from podcast_submit_operation.submit_operation import get_podcast_episode, submit_transcribe_operation
-
-log = create_logger(__name__)
-
-ADD_TO_QUEUE_AT_DURATION_MULTIPLIER = 1.1
-"""
-How soon to expect the transcription results to become available in relation to episode's duration.
-
-For example, if the episode's duration is 60 minutes, and the multiplier is 1.1, the transcription results fetch will
-first be attempted after 60 * 1.1 = 66 minutes.
-"""
-
-
-def run_podcast_submit_operation(stories_id: int) -> None:
-    """Submit a podcast episode to the Speech API."""
-
-    if isinstance(stories_id, bytes):
-        stories_id = decode_object_from_bytes_if_needed(stories_id)
-    stories_id = int(stories_id)
-
-    db = connect_to_db()
-
-    log.info(f"Submitting story's {stories_id} podcast episode for transcription...")
-
-    try:
-        episode = get_podcast_episode(db=db, stories_id=stories_id)
-        speech_operation_id = submit_transcribe_operation(episode=episode)
-
-        db.query("""
-            UPDATE podcast_episodes
-            SET speech_operation_id = %(speech_operation_id)s
-            WHERE podcast_episodes_id = %(podcast_episodes_id)s
-        """, {
-            'podcast_episodes_id': episode.podcast_episodes_id,
-            'speech_operation_id': speech_operation_id,
-        })
-
-        add_to_queue_interval = f"{int(episode.duration + ADD_TO_QUEUE_AT_DURATION_MULTIPLIER)} seconds"
-        db.query("""
-            INSERT INTO podcast_episode_transcript_fetches (
-                podcast_episodes_id,
-                add_to_queue_at
-            ) VALUES (
-                %(podcast_episodes_id)s,
-                NOW() + INTERVAL %(add_to_queue_interval)s
-            )
-        """, {
-            'podcast_episodes_id': episode.podcast_episodes_id,
-            'add_to_queue_interval': add_to_queue_interval,
-        })
-
-    except McPodcastSubmitOperationSoftException as ex:
-        # Soft exceptions
-        log.error(f"Unable to submit podcast episode for story {stories_id}: {ex}")
-        raise ex
-
-    except Exception as ex:
-        # Hard and other exceptions
-        fatal_error(f"Fatal / unknown error while submitting podcast episode for story {stories_id}: {ex}")
-
-    log.info(f"Done submitting story's {stories_id} podcast episode for transcription")
-
-
-if __name__ == '__main__':
-    app = JobBroker(queue_name='MediaWords::Job::Podcast::SubmitOperation')
-    app.start_worker(handler=run_podcast_submit_operation)
diff --git a/apps/podcast-submit-operation/docker-compose.tests.yml b/apps/podcast-submit-operation/docker-compose.tests.yml
deleted file mode 100644
index 349eaa1c3b..0000000000
--- a/apps/podcast-submit-operation/docker-compose.tests.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-version: "3.7"
-
-services:
-
-    podcast-submit-operation:
-        image: gcr.io/mcback/podcast-submit-operation:latest
-        init: true
-        stop_signal: SIGKILL
-        environment:
-            MC_PODCAST_GC_AUTH_JSON_BASE64: "${MC_PODCAST_GC_AUTH_JSON_BASE64}"
-        volumes:
-            - type: bind
-              source: ./bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./src/
-              target: /opt/mediacloud/src/podcast-submit-operation/
-            - type: bind
-              source: ./tests/
-              target: /opt/mediacloud/tests/
-            - type: bind
-              source: ./../common/src/
-              target: /opt/mediacloud/src/common/
-        depends_on:
-            - postgresql-pgbouncer
-            # We don't need "rabbitmq-server" to run tests
-
-    postgresql-pgbouncer:
-        image: gcr.io/mcback/postgresql-pgbouncer:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 6432
-        volumes:
-            - type: bind
-              source: ./../postgresql-pgbouncer/conf/
-              target: /etc/pgbouncer/
-        depends_on:
-            - postgresql-server
-
-    postgresql-server:
-        image: gcr.io/mcback/postgresql-server:latest
-        init: true
-        stop_signal: SIGKILL
-        expose:
-            - 5432
-        volumes:
-            - type: bind
-              source: ./../postgresql-server/bin/
-              target: /opt/mediacloud/bin/
-            - type: bind
-              source: ./../postgresql-server/schema/
-              target: /opt/mediacloud/schema/
-            - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
diff --git a/apps/podcast-submit-operation/src/python/podcast_submit_operation/config.py b/apps/podcast-submit-operation/src/python/podcast_submit_operation/config.py
deleted file mode 100644
index 0d20337c98..0000000000
--- a/apps/podcast-submit-operation/src/python/podcast_submit_operation/config.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from mediawords.util.config import file_with_env_value
-
-
-class PodcastSubmitOperationConfig(object):
-    """
-    Podcast submit transcription operation configuration.
-    """
-
-    @staticmethod
-    def gc_auth_json_file() -> str:
-        """Return path to Google Cloud authentication JSON file."""
-        return file_with_env_value(name='MC_PODCAST_GC_AUTH_JSON_BASE64', encoded_with_base64=True)
diff --git a/apps/podcast-submit-operation/src/python/podcast_submit_operation/exceptions.py b/apps/podcast-submit-operation/src/python/podcast_submit_operation/exceptions.py
deleted file mode 100644
index 2ed79eb39a..0000000000
--- a/apps/podcast-submit-operation/src/python/podcast_submit_operation/exceptions.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import abc
-
-
-class _AbstractMcPodcastSubmitOperationException(Exception, metaclass=abc.ABCMeta):
-    """Abstract exception."""
-    pass
-
-
-class McPodcastSubmitOperationSoftException(_AbstractMcPodcastSubmitOperationException):
-    """Soft errors exception."""
-    pass
-
-
-class McPodcastNoEpisodesException(McPodcastSubmitOperationSoftException):
-    """Exception thrown when there are no episodes for a story."""
-    pass
-
-
-class McPodcastEpisodeTooLongException(McPodcastSubmitOperationSoftException):
-    """Exception raised when podcast's episode is too long."""
-    pass
-
-
-# ---
-
-class McPodcastSubmitOperationHardException(_AbstractMcPodcastSubmitOperationException):
-    """Hard errors exception."""
-    pass
-
-
-class McPodcastDatabaseErrorException(McPodcastSubmitOperationHardException):
-    """Exception thrown on database errors."""
-    pass
-
-
-class McPodcastInvalidInputException(McPodcastSubmitOperationHardException):
-    """Exception thrown on invalid inputs."""
-    pass
-
-
-class McPodcastMisconfiguredSpeechAPIException(McPodcastSubmitOperationHardException):
-    """Exception thrown on misconfigured Google Speech API."""
-    pass
-
-
-class McPodcastSpeechAPIRequestFailedException(McPodcastSubmitOperationHardException):
-    """
-    Exception that is thrown when we're unable to submit a new job to Speech API.
-
-    This is a hard exception because we should be able to handle "soft" failures (e.g. temporary network errors) of
-    Speech API in the code, and on any other, previously unseen, problems (service downtime, ran out of money, blocked,
-    outdated API version, etc.) it's better just to shut down the worker
-    """
-    pass
diff --git a/apps/podcast-submit-operation/src/python/podcast_submit_operation/submit_operation.py b/apps/podcast-submit-operation/src/python/podcast_submit_operation/submit_operation.py
deleted file mode 100644
index 32e754ad97..0000000000
--- a/apps/podcast-submit-operation/src/python/podcast_submit_operation/submit_operation.py
+++ /dev/null
@@ -1,216 +0,0 @@
-import time
-from typing import Dict, Any
-
-# noinspection PyPackageRequirements
-from google.api_core.exceptions import ServiceUnavailable
-# noinspection PyPackageRequirements
-from google.cloud.speech_v1p1beta1 import SpeechClient, RecognitionConfig
-
-from mediawords.db import DatabaseHandler
-from mediawords.util.log import create_logger
-
-from podcast_submit_operation.config import PodcastSubmitOperationConfig
-from podcast_submit_operation.exceptions import (
-    McPodcastNoEpisodesException,
-    McPodcastDatabaseErrorException,
-    McPodcastInvalidInputException,
-    McPodcastMisconfiguredSpeechAPIException,
-    McPodcastEpisodeTooLongException,
-    McPodcastSpeechAPIRequestFailedException,
-)
-
-log = create_logger(__name__)
-
-MAX_DURATION = 60 * 60 * 2
-"""Max. podcast episode duration (in seconds) to submit for transcription."""
-
-MAX_RETRIES = 10
-"""Max. number of retries for submitting a Speech API long running operation."""
-
-DELAY_BETWEEN_RETRIES = 5
-"""How long to wait (in seconds) between retries."""
-
-
-class PodcastEpisode(object):
-    """
-    Podcast episode object.
-
-    Postprocesses database row from "podcast_episodes" and does some extra checks.
-    """
-    __slots__ = [
-        '__stories_id',
-        '__podcast_episodes_id',
-        '__gcs_uri',
-        '__duration',
-        '__codec',
-        '__sample_rate',
-        '__bcp47_language_code',
-    ]
-
-    def __init__(self, stories_id: int, db_row: Dict[str, Any]):
-        self.__stories_id = stories_id
-        self.__podcast_episodes_id = db_row['podcast_episodes_id']
-        self.__gcs_uri = db_row['gcs_uri']
-        self.__duration = db_row['duration']
-        self.__codec = db_row['codec']
-        self.__sample_rate = db_row['sample_rate']
-        self.__bcp47_language_code = db_row['bcp47_language_code']
-
-    @property
-    def stories_id(self) -> int:
-        return self.__stories_id
-
-    @property
-    def podcast_episodes_id(self) -> int:
-        return self.__podcast_episodes_id
-
-    @property
-    def gcs_uri(self) -> str:
-        if not self.__gcs_uri.startswith('gs://'):
-            raise McPodcastInvalidInputException("Google Cloud Storage URI doesn't have gs:// prefix.")
-        return self.__gcs_uri
-
-    @property
-    def duration(self) -> int:
-        if not self.__duration:
-            raise McPodcastInvalidInputException("Duration is unset or zero.")
-        return self.__duration
-
-    @property
-    def codec(self) -> RecognitionConfig.AudioEncoding:
-        try:
-            encoding_obj = getattr(RecognitionConfig.AudioEncoding, self.__codec)
-        except Exception as ex:
-            raise McPodcastInvalidInputException(f"Invalid codec '{self.__codec}': {ex}")
-
-        return encoding_obj
-
-    @property
-    def sample_rate(self) -> int:
-        if not self.__sample_rate:
-            raise McPodcastInvalidInputException("Sample rate is unset or zero.")
-        return self.__sample_rate
-
-    @property
-    def bcp47_language_code(self) -> str:
-        if '-' not in self.__bcp47_language_code and self.__bcp47_language_code != 'zh':
-            raise McPodcastInvalidInputException(f"Invalid BCP 47 language code '{self.__bcp47_language_code}'.")
-        return self.__bcp47_language_code
-
-
-def get_podcast_episode(db: DatabaseHandler, stories_id: int) -> PodcastEpisode:
-    """
-    Get podcast episode object for story ID.
-
-    :param db: Database handler.
-    :param stories_id: Story ID.
-    :return: Podcast episode object.
-    """
-    try:
-        podcast_episodes = db.select(
-            table='podcast_episodes',
-            what_to_select='*',
-            condition_hash={'stories_id': stories_id},
-        ).hashes()
-
-    except Exception as ex:
-        raise McPodcastDatabaseErrorException(f"Unable to fetch story's {stories_id} podcast episodes: {ex}")
-
-    if not podcast_episodes:
-        raise McPodcastNoEpisodesException(f"There are no podcast episodes for story {stories_id}")
-
-    if len(podcast_episodes) > 1:
-        # That's very weird, there should be only one episode per story
-        raise McPodcastDatabaseErrorException(f"There's more than one podcast episode for story {stories_id}")
-
-    try:
-        episode = PodcastEpisode(stories_id=stories_id, db_row=podcast_episodes[0])
-    except Exception as ex:
-        raise McPodcastInvalidInputException(f"Invalid episode for story {stories_id}: {ex}")
-
-    if episode.duration > MAX_DURATION:
-        raise McPodcastEpisodeTooLongException(
-            f"Story's {stories_id} podcast episode is too long ({episode.duration} seconds)."
-        )
-
-    return episode
-
-
-def submit_transcribe_operation(episode: PodcastEpisode) -> int:
-    """
-    Submit a Speech API long running operation to transcribe a podcast episode.
-
-    :param episode: Podcast episode object.
-    :return Operation's ID to use for fetching operation results.
-    """
-
-    try:
-        config = PodcastSubmitOperationConfig()
-        client = SpeechClient.from_service_account_json(config.gc_auth_json_file())
-    except Exception as ex:
-        raise McPodcastMisconfiguredSpeechAPIException(f"Unable to create Speech API client: {ex}")
-
-    try:
-        config = RecognitionConfig(
-            encoding=episode.codec,
-            sample_rate_hertz=episode.sample_rate,
-            # We always set the channel count to 1 and disable separate recognition per channel as our inputs are all
-            # mono audio files and do not have separate speakers per audio channel.
-            audio_channel_count=1,
-            enable_separate_recognition_per_channel=False,
-            language_code=episode.bcp47_language_code,
-            alternative_language_codes=[
-                # FIXME add all Chinese variants
-                # FIXME add Mexican Spanish variants
-            ],
-
-            speech_contexts=[
-                # Speech API works pretty well without custom contexts
-            ],
-            # Don't care that much about word confidence
-            enable_word_confidence=False,
-            # Punctuation doesn't work that well but we still enable it here
-            enable_automatic_punctuation=True,
-            # Not setting 'model' as 'use_enhanced' will then choose the best model for us
-            # Using enhanced (more expensive) model, where available
-            use_enhanced=True,
-        )
-    except Exception as ex:
-        raise McPodcastMisconfiguredSpeechAPIException(f"Unable to initialize Speech API configuration: {ex}")
-
-    log.info(f"Submitting a Speech API operation for story {episode.stories_id}...")
-    speech_operation = None
-    for attempt in range(1, MAX_RETRIES + 1):
-
-        if attempt > 1:
-            log.warning(f"Waiting for {DELAY_BETWEEN_RETRIES} seconds and retrying #{attempt}...")
-            time.sleep(DELAY_BETWEEN_RETRIES)
-
-        try:
-            speech_operation = client.long_running_recognize(config=config, audio={"uri": episode.gcs_uri})
-        except ServiceUnavailable as ex:
-            # Speech API sometimes throws:
-            #
-            #   google.api_core.exceptions.ServiceUnavailable: 503 failed to connect to all addresses
-            #
-            log.error(f"Unable to submit an operation because service is unavailable: {ex}")
-        except Exception as ex:
-            raise McPodcastSpeechAPIRequestFailedException(f"Unable to submit a Speech API operation: {ex}")
-        else:
-            break
-
-    if not speech_operation:
-        raise McPodcastSpeechAPIRequestFailedException(f"Ran out of retries while submitting Speech API operation.")
-
-    try:
-        # We get the operation name in a try-except block because accessing it is not that well documented, so Google
-        # might change the property names whenever they please and we wouldn't necessarily notice otherwise
-        operation_id = speech_operation.operation.name
-        if not operation_id:
-            raise McPodcastMisconfiguredSpeechAPIException(f"Operation name is empty.")
-    except Exception as ex:
-        raise McPodcastMisconfiguredSpeechAPIException(f"Unable to get operation name: {ex}")
-
-    log.info(f"Submitted Speech API operation '{operation_id}' for story {episode.stories_id}")
-
-    return operation_id
diff --git a/apps/podcast-submit-operation/src/requirements.txt b/apps/podcast-submit-operation/src/requirements.txt
deleted file mode 100644
index 59e80a7b73..0000000000
--- a/apps/podcast-submit-operation/src/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-google-cloud-speech==2.0.1
diff --git a/apps/podcast-submit-operation/tests/python/test_submit_operation.py b/apps/podcast-submit-operation/tests/python/test_submit_operation.py
deleted file mode 100644
index 013037530c..0000000000
--- a/apps/podcast-submit-operation/tests/python/test_submit_operation.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from mediawords.db import connect_to_db
-from mediawords.test.db.create import create_test_medium, create_test_feed, create_test_story
-
-from podcast_submit_operation.submit_operation import get_podcast_episode, submit_transcribe_operation
-
-
-def test_submit_transcribe_operation():
-    test_gcs_uri = "gs://mc-podcast-sample-audio-files/samples/kim_kardashian-mp3.mp3"
-
-    db = connect_to_db()
-    test_medium = create_test_medium(db=db, label='test')
-    test_feed = create_test_feed(db=db, label='test', medium=test_medium)
-    story = create_test_story(db=db, label='test', feed=test_feed)
-
-    stories_id = story['stories_id']
-
-    enclosure = db.insert(table='story_enclosures', insert_hash={
-        'stories_id': stories_id,
-        # URL doesn't really matter as we won't be fetching it
-        'url': 'http://example.com/',
-        'mime_type': 'audio/mpeg',
-        'length': 100000,
-    })
-
-    db.insert(table='podcast_episodes', insert_hash={
-        'stories_id': stories_id,
-        'story_enclosures_id': enclosure['story_enclosures_id'],
-        'gcs_uri': test_gcs_uri,
-
-        # We lie about the duration because we want to test whether 'add_to_queue_at' will be set way into the future
-        'duration': 60 * 60,
-
-        'codec': 'MP3',
-        'sample_rate': 44100,
-        'bcp47_language_code': 'en-US',
-    })
-
-    episode = get_podcast_episode(db=db, stories_id=stories_id)
-    speech_operation_id = submit_transcribe_operation(episode=episode)
-    assert speech_operation_id
diff --git a/apps/podcast-fetch-transcript/.dockerignore b/apps/podcast-transcribe-episode/.dockerignore
similarity index 100%
rename from apps/podcast-fetch-transcript/.dockerignore
rename to apps/podcast-transcribe-episode/.dockerignore
diff --git a/apps/podcast-transcribe-episode/.idea/.gitignore b/apps/podcast-transcribe-episode/.idea/.gitignore
new file mode 100644
index 0000000000..73f69e0958
--- /dev/null
+++ b/apps/podcast-transcribe-episode/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/apps/podcast-fetch-episode/.idea/externalDependencies.xml b/apps/podcast-transcribe-episode/.idea/externalDependencies.xml
similarity index 100%
rename from apps/podcast-fetch-episode/.idea/externalDependencies.xml
rename to apps/podcast-transcribe-episode/.idea/externalDependencies.xml
diff --git a/apps/podcast-transcribe-episode/.idea/inspectionProfiles/Project_Default.xml b/apps/podcast-transcribe-episode/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000000..fe9d3b7548
--- /dev/null
+++ b/apps/podcast-transcribe-episode/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,15 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="2">
+            <item index="0" class="java.lang.String" itemvalue="cadence" />
+            <item index="1" class="java.lang.String" itemvalue="grpc" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
diff --git a/apps/podcast-fetch-episode/.idea/inspectionProfiles/profiles_settings.xml b/apps/podcast-transcribe-episode/.idea/inspectionProfiles/profiles_settings.xml
similarity index 100%
rename from apps/podcast-fetch-episode/.idea/inspectionProfiles/profiles_settings.xml
rename to apps/podcast-transcribe-episode/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/apps/podcast-transcribe-episode/.idea/mediawords.sql b/apps/podcast-transcribe-episode/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/podcast-transcribe-episode/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/podcast-transcribe-episode/.idea/misc.xml b/apps/podcast-transcribe-episode/.idea/misc.xml
new file mode 100644
index 0000000000..d89177f747
--- /dev/null
+++ b/apps/podcast-transcribe-episode/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (podcast-transcribe-episode at [/home/pypt/m/apps/podcast-transcribe-episode/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/apps/podcast-submit-operation/.idea/modules.xml b/apps/podcast-transcribe-episode/.idea/modules.xml
similarity index 51%
rename from apps/podcast-submit-operation/.idea/modules.xml
rename to apps/podcast-transcribe-episode/.idea/modules.xml
index 26bb21f27e..9023537213 100644
--- a/apps/podcast-submit-operation/.idea/modules.xml
+++ b/apps/podcast-transcribe-episode/.idea/modules.xml
@@ -2,7 +2,7 @@
 <project version="4">
   <component name="ProjectModuleManager">
     <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/podcast-submit-operation.iml" filepath="$PROJECT_DIR$/.idea/podcast-submit-operation.iml" />
+      <module fileurl="file://$PROJECT_DIR$/.idea/podcast-transcribe-episode.iml" filepath="$PROJECT_DIR$/.idea/podcast-transcribe-episode.iml" />
     </modules>
   </component>
 </project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-transcript/.idea/podcast-fetch-transcript.iml b/apps/podcast-transcribe-episode/.idea/podcast-transcribe-episode.iml
similarity index 80%
rename from apps/podcast-fetch-transcript/.idea/podcast-fetch-transcript.iml
rename to apps/podcast-transcribe-episode/.idea/podcast-transcribe-episode.iml
index ffc8ff3cc9..16f0d9a079 100644
--- a/apps/podcast-fetch-transcript/.idea/podcast-fetch-transcript.iml
+++ b/apps/podcast-transcribe-episode/.idea/podcast-transcribe-episode.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (podcast-fetch-transcript at [/home/pypt/m/apps/podcast-fetch-transcript/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (podcast-transcribe-episode at [/home/pypt/m/apps/podcast-transcribe-episode/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/podcast-fetch-episode/.idea/runConfigurations/Dockerfile.xml b/apps/podcast-transcribe-episode/.idea/runConfigurations/Dockerfile.xml
similarity index 83%
rename from apps/podcast-fetch-episode/.idea/runConfigurations/Dockerfile.xml
rename to apps/podcast-transcribe-episode/.idea/runConfigurations/Dockerfile.xml
index 83b1a58573..85f79e0693 100644
--- a/apps/podcast-fetch-episode/.idea/runConfigurations/Dockerfile.xml
+++ b/apps/podcast-transcribe-episode/.idea/runConfigurations/Dockerfile.xml
@@ -2,12 +2,12 @@
   <configuration default="false" name="Dockerfile" type="docker-deploy" factoryName="dockerfile" server-name="mediacloud">
     <deployment type="dockerfile">
       <settings>
-        <option name="buildCliOptions" value="--cache-from gcr.io/mcback/podcast-fetch-episode:latest" />
+        <option name="imageTag" value="gcr.io/mcback/podcast-transcribe-episode:latest" />
+        <option name="buildCliOptions" value="--cache-from gcr.io/mcback/podcast-transcribe-episode:latest" />
         <option name="buildOnly" value="true" />
         <option name="command" value="" />
         <option name="containerName" value="" />
         <option name="entrypoint" value="" />
-        <option name="imageTag" value="gcr.io/mcback/podcast-fetch-episode:latest" />
         <option name="commandLineOptions" value="" />
         <option name="sourceFilePath" value="Dockerfile" />
       </settings>
diff --git a/apps/podcast-transcribe-episode/.idea/sqlDataSources.xml b/apps/podcast-transcribe-episode/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..688da92e7e
--- /dev/null
+++ b/apps/podcast-transcribe-episode/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="c523b731-cf4c-4c28-92b5-fc746b47430e" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/.idea/sqldialects.xml b/apps/podcast-transcribe-episode/.idea/sqldialects.xml
similarity index 62%
rename from apps/podcast-poll-due-operations/.idea/sqldialects.xml
rename to apps/podcast-transcribe-episode/.idea/sqldialects.xml
index 790b3f37f8..f8c2c59528 100644
--- a/apps/podcast-poll-due-operations/.idea/sqldialects.xml
+++ b/apps/podcast-transcribe-episode/.idea/sqldialects.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="SqlDialectMappings">
-    <file url="file://$PROJECT_DIR$/../postgresql-server/schema/mediawords.sql" dialect="PostgreSQL" />
+    <file url="file://$PROJECT_DIR$/.idea/mediawords.sql" dialect="PostgreSQL" />
     <file url="PROJECT" dialect="PostgreSQL" />
   </component>
 </project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-episode/.idea/vcs.xml b/apps/podcast-transcribe-episode/.idea/vcs.xml
similarity index 100%
rename from apps/podcast-fetch-episode/.idea/vcs.xml
rename to apps/podcast-transcribe-episode/.idea/vcs.xml
diff --git a/apps/podcast-transcribe-episode/Dockerfile b/apps/podcast-transcribe-episode/Dockerfile
new file mode 100644
index 0000000000..9cec878422
--- /dev/null
+++ b/apps/podcast-transcribe-episode/Dockerfile
@@ -0,0 +1,36 @@
+#
+# Fetch podcast episode, convert it (if needed), transcribe and store to the database
+#
+
+FROM gcr.io/mcback/common:latest
+
+# Install FFmpeg for manipulating audio files
+RUN apt-get -y --no-install-recommends install ffmpeg
+
+# Install Python dependencies
+COPY src/requirements.txt /var/tmp/
+RUN \
+    cd /var/tmp/ && \
+    pip3 install -r requirements.txt && \
+    rm requirements.txt && \
+    rm -rf /root/.cache/ && \
+    true
+
+# Copy sources
+COPY src/ /opt/mediacloud/src/podcast-transcribe-episode/
+ENV PERL5LIB="/opt/mediacloud/src/podcast-transcribe-episode/perl:${PERL5LIB}" \
+    PYTHONPATH="/opt/mediacloud/src/podcast-transcribe-episode/python:${PYTHONPATH}"
+
+# Copy worker script
+COPY bin /opt/mediacloud/bin
+
+USER mediacloud
+
+# Set a failing CMD because we'll be using the same image to run:
+#
+# * "rabbitmq_worker.py" - processes Celery jobs, starts Temporal workflows for those;
+# * "workflow_worker.py" - runs Temporal workflows.
+#
+# so the user is expected to set "command" in docker-compose.yml to run a specific worker.
+#
+CMD ["SET_CONTAINER_COMMAND_TO_ONE_OF_THE_WORKERS"]
diff --git a/apps/podcast-transcribe-episode/README.md b/apps/podcast-transcribe-episode/README.md
new file mode 100644
index 0000000000..a975e3f612
--- /dev/null
+++ b/apps/podcast-transcribe-episode/README.md
@@ -0,0 +1,18 @@
+# Podcast transcription
+
+## TODO
+
+* [Upload transcriptions directly to GCS](https://cloud.google.com/speech-to-text/docs/async-recognize#speech_transcribe_async_gcs-python)
+  once that's no longer a demo feature
+* Add all Chinese variants to `alternative_language_codes`
+* Add all Mexican Spanish variants to `alternative_language_codes`
+* Post-init [validation of dataclasses](https://docs.python.org/3/library/dataclasses.html#post-init-processing)
+* When operation ID can't be found, resubmit the podcast for transcription as that might mean that the operation results
+  weren't fetched in time and so the operation has expired
+* Add heartbeats to transcoding activity
+* Test running the same activity multiple times
+* If an activity throws an exception, its message should get printed out to the console as well (in addition to
+  Temporal's log)
+* Track failed workflows / activities in Munin
+* Instead (in addition to) of setting `workflow_run_timeout` in `test_workflow.py`, limit retries of the individual
+  activities too so that when they fail, we'd get a nice error message printed to the test log
diff --git a/apps/podcast-transcribe-episode/bin/rabbitmq_worker.py b/apps/podcast-transcribe-episode/bin/rabbitmq_worker.py
new file mode 100755
index 0000000000..52f6863072
--- /dev/null
+++ b/apps/podcast-transcribe-episode/bin/rabbitmq_worker.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+import asyncio
+
+from mediawords.job import JobBroker
+from mediawords.util.log import create_logger
+from mediawords.util.perl import decode_object_from_bytes_if_needed
+from mediawords.workflow.client import workflow_client
+
+# noinspection PyPackageRequirements
+from temporal.workflow import WorkflowClient, WorkflowOptions
+
+from podcast_transcribe_episode.workflow_interface import PodcastTranscribeWorkflow
+
+log = create_logger(__name__)
+
+
+async def _start_workflow(stories_id: int) -> None:
+    log.info(f"Starting a workflow for story {stories_id}...")
+
+    client = workflow_client()
+    workflow: PodcastTranscribeWorkflow = client.new_workflow_stub(
+        cls=PodcastTranscribeWorkflow,
+        workflow_options=WorkflowOptions(workflow_id=str(stories_id)),
+    )
+
+    # Fire and forget as the workflow will do everything (including adding a extraction job) itself
+    await WorkflowClient.start(workflow.transcribe_episode, stories_id)
+
+    log.info(f"Started a workflow for story {stories_id}...")
+
+
+def run_podcast_fetch_episode(stories_id: int) -> None:
+    if isinstance(stories_id, bytes):
+        stories_id = decode_object_from_bytes_if_needed(stories_id)
+    stories_id = int(stories_id)
+
+    asyncio.run(_start_workflow(stories_id=stories_id))
+
+
+if __name__ == '__main__':
+    app = JobBroker(queue_name='MediaWords::Job::Podcast::TranscribeEpisode')
+    app.start_worker(handler=run_podcast_fetch_episode)
diff --git a/apps/podcast-transcribe-episode/bin/workflow_worker.py b/apps/podcast-transcribe-episode/bin/workflow_worker.py
new file mode 100755
index 0000000000..6859753a8e
--- /dev/null
+++ b/apps/podcast-transcribe-episode/bin/workflow_worker.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+import asyncio
+
+# noinspection PyPackageRequirements
+from temporal.workerfactory import WorkerFactory
+
+from mediawords.util.log import create_logger
+from mediawords.workflow.client import workflow_client
+
+from podcast_transcribe_episode.workflow import PodcastTranscribeWorkflowImpl, PodcastTranscribeActivitiesImpl
+from podcast_transcribe_episode.workflow_interface import TASK_QUEUE, PodcastTranscribeActivities
+
+log = create_logger(__name__)
+
+
+async def _start_worker():
+    client = workflow_client()
+    factory = WorkerFactory(client=client, namespace=client.namespace)
+    worker = factory.new_worker(task_queue=TASK_QUEUE)
+    worker.register_activities_implementation(
+        activities_instance=PodcastTranscribeActivitiesImpl(),
+        activities_cls_name=PodcastTranscribeActivities.__name__,
+    )
+    worker.register_workflow_implementation_type(impl_cls=PodcastTranscribeWorkflowImpl)
+    factory.start()
+
+
+if __name__ == '__main__':
+    loop = asyncio.get_event_loop()
+    asyncio.ensure_future(_start_worker())
+    loop.run_forever()
diff --git a/apps/podcast-transcribe-episode/docker-compose.tests.yml b/apps/podcast-transcribe-episode/docker-compose.tests.yml
new file mode 100644
index 0000000000..dff828c7c0
--- /dev/null
+++ b/apps/podcast-transcribe-episode/docker-compose.tests.yml
@@ -0,0 +1,148 @@
+version: "3.7"
+
+services:
+
+    podcast-transcribe-episode:
+        image: gcr.io/mcback/podcast-transcribe-episode:latest
+        init: true
+        stop_signal: SIGKILL
+        environment:
+            MC_PODCAST_AUTH_JSON_BASE64: "${MC_PODCAST_AUTH_JSON_BASE64}"
+            MC_PODCAST_RAW_ENCLOSURES_BUCKET_NAME: "${MC_PODCAST_RAW_ENCLOSURES_BUCKET_NAME}"
+            MC_PODCAST_TRANSCODED_EPISODES_BUCKET_NAME: "${MC_PODCAST_TRANSCODED_EPISODES_BUCKET_NAME}"
+            MC_PODCAST_TRANSCRIPTS_BUCKET_NAME: "${MC_PODCAST_TRANSCRIPTS_BUCKET_NAME}"
+            # Dev/test environments don't use path prefixes:
+            #
+            # * MC_PODCAST_RAW_ENCLOSURES_PATH_PREFIX
+            # * MC_PODCAST_TRANSCODED_EPISODES_PATH_PREFIX
+            # * MC_PODCAST_TRANSCRIPTS_PATH_PREFIX
+            #
+            # as they create a different, timestamped prefix for every test run.
+
+        volumes:
+            - type: bind
+              source: ./bin/
+              target: /opt/mediacloud/bin/
+            - type: bind
+              source: ./src/
+              target: /opt/mediacloud/src/podcast-transcribe-episode/
+            - type: bind
+              source: ./tests/
+              target: /opt/mediacloud/tests/
+            - type: bind
+              source: ./../common/src/
+              target: /opt/mediacloud/src/common/
+        depends_on:
+            - postgresql-pgbouncer
+            - rabbitmq-server
+            - temporal-server
+
+            # Not needed for running the test but useful for debugging, demos
+            # and such
+            - temporal-webapp
+
+    postgresql-pgbouncer:
+        image: gcr.io/mcback/postgresql-pgbouncer:latest
+        init: true
+        stop_signal: SIGKILL
+        expose:
+            - 6432
+        volumes:
+            - type: bind
+              source: ./../postgresql-pgbouncer/conf/
+              target: /etc/pgbouncer/
+        depends_on:
+            - postgresql-server
+
+    postgresql-server:
+        image: gcr.io/mcback/postgresql-server:latest
+        init: true
+        stop_signal: SIGKILL
+        expose:
+            - 5432
+        volumes:
+            - type: bind
+              source: ./../postgresql-server/bin/
+              target: /opt/mediacloud/bin/
+            - type: bind
+              source: ./../postgresql-server/schema/
+              target: /opt/mediacloud/schema/
+            - type: bind
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
+
+    rabbitmq-server:
+        image: gcr.io/mcback/rabbitmq-server:latest
+        init: true
+        stop_signal: SIGKILL
+        expose:
+            - 5672
+            - 15672
+        volumes:
+            - type: bind
+              source: ./../rabbitmq-server/conf/
+              target: /etc/rabbitmq/
+
+    temporal-server:
+        image: gcr.io/mcback/temporal-server:latest
+        init: true
+        stop_signal: SIGKILL
+        depends_on:
+            - temporal-postgresql
+            - temporal-elasticsearch
+        expose:
+            - 6933
+            - 6934
+            - 6935
+            - 6939
+            - 7233
+            - 7234
+            - 7235
+            - 7239
+        volumes:
+            - type: bind
+              source: ./../temporal-server/bin/
+              target: /opt/temporal-server/bin/
+            - type: bind
+              source: ./../temporal-server/config/dynamicconfig.yaml
+              target: /opt/temporal-server/config/dynamicconfig.yaml
+            - type: bind
+              source: ./../temporal-server/config/mediacloud_template.yaml
+              target: /opt/temporal-server/config/mediacloud_template.yaml
+
+    temporal-postgresql:
+        image: gcr.io/mcback/temporal-postgresql:latest
+        init: true
+        stop_signal: SIGKILL
+        expose:
+            - 5432
+        volumes:
+            - type: bind
+              source: ./../temporal-postgresql/bin/
+              target: /opt/temporal-postgresql/bin/
+            - type: bind
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
+
+    temporal-elasticsearch:
+        image: gcr.io/mcback/temporal-elasticsearch:latest
+        init: true
+        stop_signal: SIGKILL
+        expose:
+            - "9200"
+            - "9300"
+        volumes:
+            - type: bind
+              source: ./../elasticsearch-base/bin/elasticsearch.sh
+              target: /opt/elasticsearch/bin/elasticsearch.sh
+            # Not mounting config as it gets concatenated into a single file
+
+    temporal-webapp:
+        image: gcr.io/mcback/temporal-webapp:latest
+        init: true
+        stop_signal: SIGKILL
+        expose:
+            - "8088"
+        ports:
+            # Expose to host for debugging
+            - "8088:8088"
diff --git a/apps/podcast-fetch-episode/tests/python/__init__.py b/apps/podcast-transcribe-episode/src/__init__.py
similarity index 100%
rename from apps/podcast-fetch-episode/tests/python/__init__.py
rename to apps/podcast-transcribe-episode/src/__init__.py
diff --git a/apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/__init__.py b/apps/podcast-transcribe-episode/src/python/__init__.py
similarity index 100%
rename from apps/podcast-fetch-transcript/src/python/podcast_fetch_transcript/__init__.py
rename to apps/podcast-transcribe-episode/src/python/__init__.py
diff --git a/apps/podcast-fetch-transcript/tests/python/__init__.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/__init__.py
similarity index 100%
rename from apps/podcast-fetch-transcript/tests/python/__init__.py
rename to apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/__init__.py
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/audio_codecs.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/audio_codecs.py
similarity index 84%
rename from apps/podcast-fetch-episode/src/python/podcast_fetch_episode/audio_codecs.py
rename to apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/audio_codecs.py
index 4dafee4146..1529b3b34c 100644
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/audio_codecs.py
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/audio_codecs.py
@@ -10,36 +10,32 @@
 
 class AbstractAudioCodec(object, metaclass=abc.ABCMeta):
 
-    @classmethod
-    @abc.abstractmethod
-    def postgresql_enum_value(cls) -> str:
-        """Return value from 'podcast_episodes_audio_codec' PostgreSQL enum."""
-        raise NotImplemented("Abstract method")
-
     @classmethod
     @abc.abstractmethod
     def ffmpeg_stream_is_this_codec(cls, ffmpeg_stream: Dict[str, Any]) -> bool:
         """Return True if ffmpeg.probe()'s one of the streams ('streams' key) is of this codec."""
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError
 
     @classmethod
     @abc.abstractmethod
     def ffmpeg_container_format(cls) -> str:
         """Return FFmpeg container format (-f argument)."""
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError
 
     @classmethod
     @abc.abstractmethod
     def mime_type(cls) -> str:
         """Return MIME type to store as GCS object metadata."""
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError
 
+    @classmethod
+    @abc.abstractmethod
+    def speech_api_codec(cls) -> str:
+        """Return codec enum value to pass to Speech API when submitting the transcription operation."""
+        raise NotImplementedError
 
-class Linear16AudioCodec(AbstractAudioCodec):
 
-    @classmethod
-    def postgresql_enum_value(cls) -> str:
-        return 'LINEAR16'
+class Linear16AudioCodec(AbstractAudioCodec):
 
     @classmethod
     def ffmpeg_stream_is_this_codec(cls, ffmpeg_stream: Dict[str, Any]) -> bool:
@@ -53,12 +49,12 @@ def ffmpeg_container_format(cls) -> str:
     def mime_type(cls) -> str:
         return 'audio/wav'
 
+    @classmethod
+    def speech_api_codec(cls) -> str:
+        return 'LINEAR16'
 
-class FLACAudioCodec(AbstractAudioCodec):
 
-    @classmethod
-    def postgresql_enum_value(cls) -> str:
-        return 'FLAC'
+class FLACAudioCodec(AbstractAudioCodec):
 
     @classmethod
     def ffmpeg_stream_is_this_codec(cls, ffmpeg_stream: Dict[str, Any]) -> bool:
@@ -73,12 +69,12 @@ def ffmpeg_container_format(cls) -> str:
     def mime_type(cls) -> str:
         return 'audio/flac'
 
+    @classmethod
+    def speech_api_codec(cls) -> str:
+        return 'FLAC'
 
-class MULAWAudioCodec(AbstractAudioCodec):
 
-    @classmethod
-    def postgresql_enum_value(cls) -> str:
-        return 'MULAW'
+class MULAWAudioCodec(AbstractAudioCodec):
 
     @classmethod
     def ffmpeg_stream_is_this_codec(cls, ffmpeg_stream: Dict[str, Any]) -> bool:
@@ -92,12 +88,12 @@ def ffmpeg_container_format(cls) -> str:
     def mime_type(cls) -> str:
         return 'audio/basic'
 
+    @classmethod
+    def speech_api_codec(cls) -> str:
+        return 'MULAW'
 
-class OggOpusAudioCodec(AbstractAudioCodec):
 
-    @classmethod
-    def postgresql_enum_value(cls) -> str:
-        return 'OGG_OPUS'
+class OggOpusAudioCodec(AbstractAudioCodec):
 
     @classmethod
     def ffmpeg_stream_is_this_codec(cls, ffmpeg_stream: Dict[str, Any]) -> bool:
@@ -111,12 +107,12 @@ def ffmpeg_container_format(cls) -> str:
     def mime_type(cls) -> str:
         return 'audio/ogg'
 
+    @classmethod
+    def speech_api_codec(cls) -> str:
+        return 'OGG_OPUS'
 
-class MP3AudioCodec(AbstractAudioCodec):
 
-    @classmethod
-    def postgresql_enum_value(cls) -> str:
-        return 'MP3'
+class MP3AudioCodec(AbstractAudioCodec):
 
     @classmethod
     def ffmpeg_stream_is_this_codec(cls, ffmpeg_stream: Dict[str, Any]) -> bool:
@@ -129,3 +125,7 @@ def ffmpeg_container_format(cls) -> str:
     @classmethod
     def mime_type(cls) -> str:
         return 'audio/mpeg'
+
+    @classmethod
+    def speech_api_codec(cls) -> str:
+        return 'MP3'
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/bcp47_lang.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/bcp47_lang.py
similarity index 100%
rename from apps/podcast-fetch-episode/src/python/podcast_fetch_episode/bcp47_lang.py
rename to apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/bcp47_lang.py
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/config.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/config.py
new file mode 100644
index 0000000000..cd41237dcc
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/config.py
@@ -0,0 +1,108 @@
+import abc
+
+from mediawords.util.config import env_value, file_with_env_value
+
+
+class AbstractGCBucketConfig(object, metaclass=abc.ABCMeta):
+    """
+    Configuration of a single GCS bucket.
+    """
+
+    __slots__ = [
+        '__bucket_name',
+        '__path_prefix',
+    ]
+
+    def __init__(self, bucket_name: str = None, path_prefix: str = None):
+        """
+        Constructor.
+
+        Test classes might decide to override those.
+        """
+        self.__bucket_name = bucket_name or self._default_bucket_name()
+        self.__path_prefix = path_prefix or self._default_path_prefix()
+
+    def bucket_name(self) -> str:
+        return self.__bucket_name
+
+    def path_prefix(self) -> str:
+        return self.__path_prefix
+
+    @abc.abstractmethod
+    def _default_bucket_name(self) -> str:
+        """Default bucket name to upload objects to / download from."""
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def _default_path_prefix(self) -> str:
+        """Default path prefix under which the objects are to be found."""
+        raise NotImplementedError
+
+
+class RawEnclosuresGCBucketConfig(AbstractGCBucketConfig):
+
+    def _default_bucket_name(self) -> str:
+        return env_value(name='MC_PODCAST_RAW_ENCLOSURES_BUCKET_NAME')
+
+    def _default_path_prefix(self) -> str:
+        return env_value(name='MC_PODCAST_RAW_ENCLOSURES_PATH_PREFIX')
+
+
+class TranscodedEpisodesGCBucketConfig(AbstractGCBucketConfig):
+
+    def _default_bucket_name(self) -> str:
+        return env_value(name='MC_PODCAST_TRANSCODED_EPISODES_BUCKET_NAME')
+
+    def _default_path_prefix(self) -> str:
+        return env_value(name='MC_PODCAST_TRANSCODED_EPISODES_PATH_PREFIX')
+
+
+class TranscriptsGCBucketConfig(AbstractGCBucketConfig):
+
+    def _default_bucket_name(self) -> str:
+        return env_value(name='MC_PODCAST_TRANSCRIPTS_BUCKET_NAME')
+
+    def _default_path_prefix(self) -> str:
+        return env_value(name='MC_PODCAST_TRANSCRIPTS_PATH_PREFIX')
+
+
+class GCAuthConfig(object):
+
+    # noinspection PyMethodMayBeStatic
+    def json_file(self) -> str:
+        """Path to Google Cloud authentication JSON file."""
+        return file_with_env_value(name='MC_PODCAST_AUTH_JSON_BASE64', encoded_with_base64=True)
+
+
+class PodcastTranscribeEpisodeConfig(object):
+    """Podcast transcription configuration."""
+
+    # noinspection PyMethodMayBeStatic
+    def max_enclosure_size(self) -> int:
+        """Max. enclosure size (in bytes) that we're willing to download."""
+        return 1024 * 1024 * 500
+
+    # noinspection PyMethodMayBeStatic
+    def max_duration(self) -> int:
+        """Max. podcast episode duration (in seconds) to submit for transcription."""
+        return 60 * 60 * 2
+
+    # noinspection PyMethodMayBeStatic
+    def gc_auth(self) -> GCAuthConfig:
+        """Google Cloud (both Storage and Speech API) authentication configuration."""
+        return GCAuthConfig()
+
+    # noinspection PyMethodMayBeStatic
+    def raw_enclosures(self) -> AbstractGCBucketConfig:
+        """Configuration for GCS bucket where raw enclosures will be stored."""
+        return RawEnclosuresGCBucketConfig()
+
+    # noinspection PyMethodMayBeStatic
+    def transcoded_episodes(self) -> AbstractGCBucketConfig:
+        """Configuration for GCS bucket where transcoded, Speech API-ready episodes will be stored."""
+        return TranscodedEpisodesGCBucketConfig()
+
+    # noinspection PyMethodMayBeStatic
+    def transcripts(self) -> AbstractGCBucketConfig:
+        """Configuration for GCS bucket where JSON transcripts will be stored."""
+        return TranscriptsGCBucketConfig()
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/enclosure.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/enclosure.py
similarity index 72%
rename from apps/podcast-fetch-episode/src/python/podcast_fetch_episode/enclosure.py
rename to apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/enclosure.py
index 6d734c4d8b..f368de5ece 100644
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/enclosure.py
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/enclosure.py
@@ -1,6 +1,7 @@
 import dataclasses
 from typing import Optional, Dict, Any
 
+# noinspection PyPackageRequirements
 from furl import furl
 
 from mediawords.db import DatabaseHandler
@@ -9,25 +10,32 @@
 
 log = create_logger(__name__)
 
-_MP3_MIME_TYPES = {'audio/mpeg', 'audio/mpeg3', 'audio/mp3', 'audio/x-mpeg-3'}
-"""MIME types which MP3 files might have."""
-
-MAX_ENCLOSURE_SIZE = 1024 * 1024 * 500
-"""Max. enclosure size (in bytes) that we're willing to download."""
+StoryEnclosureDict = Dict[str, Any]
 
 
 @dataclasses.dataclass
 class StoryEnclosure(object):
     """Single story enclosure derived from feed's <enclosure /> element."""
+
+    __MP3_MIME_TYPES = {'audio/mpeg', 'audio/mpeg3', 'audio/mp3', 'audio/x-mpeg-3'}
+    """MIME types which MP3 files might have."""
+
     story_enclosures_id: int
+    """ID from 'story_enclosures' table."""
+
     url: str
+    """Enclosure's URL, e.g. 'https://www.example.com/episode.mp3'."""
+
     mime_type: Optional[str]
+    """Enclosure's reported MIME type, or None if it wasn't reported; e.g. 'audio/mpeg'."""
+
     length: Optional[int]
+    """Enclosure's reported length in bytes, or None if it wasn't reported."""
 
     def mime_type_is_mp3(self) -> bool:
         """Return True if declared MIME type is one of the MP3 ones."""
         if self.mime_type:
-            if self.mime_type.lower() in _MP3_MIME_TYPES:
+            if self.mime_type.lower() in self.__MP3_MIME_TYPES:
                 return True
         return False
 
@@ -62,8 +70,15 @@ def from_db_row(cls, db_row: Dict[str, Any]) -> 'StoryEnclosure':
             length=db_row['length'],
         )
 
+    def to_dict(self) -> StoryEnclosureDict:
+        return dataclasses.asdict(self)
+
+    @classmethod
+    def from_dict(cls, input_dict: StoryEnclosureDict) -> 'StoryEnclosure':
+        return cls(**input_dict)
+
 
-def podcast_viable_enclosure_for_story(db: DatabaseHandler, stories_id: int) -> Optional[StoryEnclosure]:
+def viable_story_enclosure(db: DatabaseHandler, stories_id: int) -> Optional[StoryEnclosure]:
     """Fetch all enclosures, find and return the one that looks like a podcast episode the most (or None)."""
     story_enclosures_dicts = db.query("""
         SELECT *
@@ -85,14 +100,14 @@ def podcast_viable_enclosure_for_story(db: DatabaseHandler, stories_id: int) ->
 
     for enclosure_dict in story_enclosures_dicts:
         if is_http_url(enclosure_dict['url']):
-            story_enclosures.append(StoryEnclosure.from_db_row(db_row=enclosure_dict))
+            story_enclosures.append(StoryEnclosure.from_db_row(enclosure_dict))
 
     chosen_enclosure = None
 
     # Look for MP3 files in MIME type
     for enclosure in story_enclosures:
         if enclosure.mime_type_is_mp3():
-            log.info(f"Choosing enclosure '{enclosure}' by its MP3 MIME type '{enclosure.mime_type}'")
+            log.info(f"Choosing enclosure '{enclosure}' due to its MP3 MIME type '{enclosure.mime_type}'")
             chosen_enclosure = enclosure
             break
 
@@ -100,7 +115,7 @@ def podcast_viable_enclosure_for_story(db: DatabaseHandler, stories_id: int) ->
     if not chosen_enclosure:
         for enclosure in story_enclosures:
             if enclosure.url_path_has_mp3_extension():
-                log.info(f"Choosing enclosure '{enclosure}' by its URL '{enclosure.url}'")
+                log.info(f"Choosing enclosure '{enclosure}' due to its URL '{enclosure.url}'")
                 chosen_enclosure = enclosure
                 break
 
@@ -109,7 +124,7 @@ def podcast_viable_enclosure_for_story(db: DatabaseHandler, stories_id: int) ->
     if not chosen_enclosure:
         for enclosure in story_enclosures:
             if enclosure.mime_type_is_audio():
-                log.info(f"Choosing enclosure '{enclosure}' by its audio MIME type '{enclosure.mime_type}'")
+                log.info(f"Choosing enclosure '{enclosure}' due to its audio MIME type '{enclosure.mime_type}'")
                 chosen_enclosure = enclosure
                 break
 
@@ -117,7 +132,7 @@ def podcast_viable_enclosure_for_story(db: DatabaseHandler, stories_id: int) ->
     if not chosen_enclosure:
         for enclosure in story_enclosures:
             if enclosure.mime_type_is_video():
-                log.info(f"Choosing enclosure '{enclosure}' by its video MIME type '{enclosure.mime_type}'")
+                log.info(f"Choosing enclosure '{enclosure}' due to its video MIME type '{enclosure.mime_type}'")
                 chosen_enclosure = enclosure
                 break
 
diff --git a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/fetch_url.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/fetch_url.py
similarity index 63%
rename from apps/podcast-fetch-episode/src/python/podcast_fetch_episode/fetch_url.py
rename to apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/fetch_url.py
index 7d7b6716e6..5d76f9b768 100644
--- a/apps/podcast-fetch-episode/src/python/podcast_fetch_episode/fetch_url.py
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/fetch_url.py
@@ -1,10 +1,10 @@
 import os
 
+# noinspection PyPackageRequirements
 import requests
 
 from mediawords.util.log import create_logger
-
-from podcast_fetch_episode.exceptions import McPodcastFileFetchFailureException, McPodcastFileStoreFailureException
+from mediawords.workflow.exceptions import McProgrammingError, McPermanentError, McTransientError
 
 log = create_logger(__name__)
 
@@ -22,16 +22,17 @@ def fetch_big_file(url: str, dest_file: str, max_size: int = 0) -> None:
     """
     Fetch a huge file from an URL to a local file.
 
-    Raises on exceptions.
+    Raises one of the _AbstractFetchBigFileException exceptions.
 
     :param url: URL that points to a huge file.
     :param dest_file: Destination path to write the fetched file to.
     :param max_size: If >0, limit the file size to a defined number of bytes.
+    :raise: ProgrammingError on unexpected fatal conditions.
     """
 
     if os.path.exists(dest_file):
         # Something's wrong with the code
-        raise McPodcastFileStoreFailureException(f"Destination file '{dest_file}' already exists.")
+        raise McProgrammingError(f"Destination file '{dest_file}' already exists.")
 
     try:
 
@@ -49,36 +50,31 @@ def fetch_big_file(url: str, dest_file: str, max_size: int = 0) -> None:
                         bytes_read += len(chunk)
                         if max_size:
                             if bytes_read > max_size:
-                                raise McPodcastFileFetchFailureException(
-                                    f"The file is bigger than the max. size of {max_size}"
-                                )
+                                raise McPermanentError(f"The file is bigger than the max. size of {max_size}")
 
                         f.write(chunk)
                         f.flush()
 
-    except McPodcastFileFetchFailureException as ex:
+    except McPermanentError as ex:
 
         __cleanup_dest_file(dest_file=dest_file)
 
-        # Raise fetching failures further as they're soft exceptions
-        raise McPodcastFileFetchFailureException(f"Unable to fetch {url}: {ex}")
+        raise ex
 
     except requests.exceptions.RequestException as ex:
 
         __cleanup_dest_file(dest_file=dest_file)
 
-        # Treat any "requests" exception as a soft failure
-        raise McPodcastFileFetchFailureException(f"'requests' exception while fetching {url}: {ex}")
+        raise McTransientError(f"'requests' exception while fetching {url}: {ex}")
 
     except Exception as ex:
 
         __cleanup_dest_file(dest_file=dest_file)
 
-        # Any other exception is assumed to be a temporary file write problem
-        raise McPodcastFileStoreFailureException(f"Unable to fetch and store {url}: {ex}")
+        raise McTransientError(f"Unable to fetch and store {url}: {ex}")
 
     if not os.path.isfile(dest_file):
         __cleanup_dest_file(dest_file=dest_file)
 
         # There should be something here so in some way it is us that have messed up
-        raise McPodcastFileStoreFailureException(f"Fetched file {dest_file} is not here after fetching it.")
+        raise McProgrammingError(f"Fetched file {dest_file} is not here after fetching it.")
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/gcs_store.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/gcs_store.py
new file mode 100644
index 0000000000..a41f76209d
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/gcs_store.py
@@ -0,0 +1,217 @@
+import os
+from typing import Optional
+
+# noinspection PyPackageRequirements
+from google.cloud import storage
+# noinspection PyPackageRequirements
+from google.cloud.exceptions import NotFound
+# noinspection PyPackageRequirements
+from google.cloud.storage import Blob, Bucket
+# noinspection PyPackageRequirements
+from google.cloud.storage.retry import DEFAULT_RETRY
+
+from mediawords.util.log import create_logger
+from mediawords.workflow.exceptions import McProgrammingError, McConfigurationError, McPermanentError, McTransientError
+
+from .config import AbstractGCBucketConfig, GCAuthConfig
+
+log = create_logger(__name__)
+
+_GCS_API_RETRIES = DEFAULT_RETRY.with_delay(initial=5, maximum=60, multiplier=2).with_deadline(deadline=60 * 10)
+"""Google Cloud Storage's retry policy."""
+
+_GCS_UPLOAD_DOWNLOAD_NUM_RETRIES = 10
+"""Number of retries to do when uploading / downloading."""
+
+
+class GCSStore(object):
+    """Google Cloud Storage store."""
+
+    __slots__ = [
+        '__bucket_internal',
+        '__gc_auth_config',
+        '__bucket_config',
+    ]
+
+    def __init__(self, bucket_config: AbstractGCBucketConfig, gc_auth_config: Optional[GCAuthConfig] = None):
+        if not bucket_config:
+            raise McConfigurationError("Bucket configuration is unset.")
+
+        if not gc_auth_config:
+            gc_auth_config = GCAuthConfig()
+
+        self.__gc_auth_config = gc_auth_config
+        self.__bucket_config = bucket_config
+        self.__bucket_internal = None
+
+    @property
+    def _bucket(self) -> Bucket:
+        """Lazy-loaded bucket."""
+        if not self.__bucket_internal:
+
+            try:
+                storage_client = storage.Client.from_service_account_json(self.__gc_auth_config.json_file())
+                self.__bucket_internal = storage_client.get_bucket(
+                    bucket_or_name=self.__bucket_config.bucket_name(),
+                    retry=_GCS_API_RETRIES,
+                )
+            except Exception as ex:
+                raise McConfigurationError(f"Unable to get GCS bucket '{self.__bucket_config.bucket_name()}': {ex}")
+
+        return self.__bucket_internal
+
+    @classmethod
+    def _remote_path(cls, path_prefix: str, object_id: str):
+        if not object_id:
+            raise McProgrammingError("Object ID is unset.")
+
+        path = os.path.join(path_prefix, object_id)
+
+        # GCS doesn't like double slashes...
+        path = os.path.normpath(path)
+
+        # ...nor is a fan of slashes at the start of path
+        while path.startswith('/'):
+            path = path[1:]
+
+        return path
+
+    def _blob_from_object_id(self, object_id: str) -> Blob:
+        if not object_id:
+            raise McProgrammingError("Object ID is unset.")
+
+        remote_path = self._remote_path(path_prefix=self.__bucket_config.path_prefix(), object_id=object_id)
+        blob = self._bucket.blob(remote_path)
+        return blob
+
+    def object_exists(self, object_id: str) -> bool:
+        """
+        Test if object exists at remote location.
+
+        :param object_id: Object ID that should be tested.
+        :return: True if object already exists under a given object ID.
+        """
+
+        if not object_id:
+            raise McProgrammingError("Object ID is unset.")
+
+        log.debug(f"Testing if object ID {object_id} exists...")
+
+        blob = self._blob_from_object_id(object_id=object_id)
+
+        log.debug(f"Testing blob for existence: {blob}")
+
+        try:
+            # blob.reload() returns metadata too
+            blob.reload(retry=_GCS_API_RETRIES)
+
+        except NotFound as ex:
+            log.debug(f"Object '{object_id}' was not found: {ex}")
+            exists = False
+
+        except Exception as ex:
+            raise McProgrammingError(f"Unable to test whether GCS object {object_id} exists: {ex}")
+
+        else:
+            exists = True
+
+        return exists
+
+    def upload_object(self, local_file_path: str, object_id: str) -> None:
+        """
+        Upload a local file to a GCS object.
+
+        Will overwrite existing objects with a warning.
+
+        :param local_file_path: Local file that should be stored.
+        :param object_id: Object ID under which the object should be stored.
+        """
+
+        if not os.path.isfile(local_file_path):
+            raise McProgrammingError(f"Local file '{local_file_path}' does not exist.")
+
+        if not object_id:
+            raise McProgrammingError("Object ID is unset.")
+
+        log.debug(f"Uploading '{local_file_path}' as object ID {object_id}...")
+
+        if self.object_exists(object_id=object_id):
+            log.warning(f"Object {object_id} already exists, will overwrite.")
+
+        blob = self._blob_from_object_id(object_id=object_id)
+
+        try:
+            blob.upload_from_filename(filename=local_file_path, content_type='application/octet-stream')
+        except Exception as ex:
+            raise McTransientError(f"Unable to upload '{local_file_path}' as object ID {object_id}: {ex}")
+
+    def download_object(self, object_id: str, local_file_path: str) -> None:
+        """
+        Download a GCS object to a local file.
+
+        :param object_id: Object ID of an object that should be downloaded.
+        :param local_file_path: Local file that the object should be stored to.
+        """
+
+        if os.path.isfile(local_file_path):
+            raise McProgrammingError(f"Local file '{local_file_path}' already exists.")
+
+        if not object_id:
+            raise McProgrammingError("Object ID is unset.")
+
+        log.debug(f"Downloading object ID {object_id} to '{local_file_path}'...")
+
+        if not self.object_exists(object_id=object_id):
+            raise McPermanentError(f"Object ID {object_id} was not found.")
+
+        blob = self._blob_from_object_id(object_id=object_id)
+
+        try:
+            blob.download_to_filename(filename=local_file_path)
+        except Exception as ex:
+            raise McTransientError(f"Unable to download object ID {object_id} to '{local_file_path}': {ex}")
+
+    def delete_object(self, object_id: str) -> None:
+        """
+        Delete object from remote location.
+
+        Doesn't raise if object doesn't exist.
+
+        Used mostly for running tests, e.g. to find out what happens if the object to be fetched doesn't exist anymore.
+
+        :param object_id: Object ID that should be deleted.
+        """
+
+        if not object_id:
+            raise McProgrammingError("Object ID is unset.")
+
+        log.debug(f"Deleting object ID {object_id}...")
+
+        blob = self._blob_from_object_id(object_id=object_id)
+
+        try:
+            blob.delete(retry=_GCS_API_RETRIES)
+
+        except NotFound:
+            log.warning(f"Object {object_id} doesn't exist.")
+
+        except Exception as ex:
+            raise McProgrammingError(f"Unable to delete GCS object {object_id}: {ex}")
+
+    def object_uri(self, object_id: str) -> str:
+        """
+        Generate Google Cloud Storage URI for the object.
+
+        :param object_id: Object ID to return the URI for.
+        :return: Full Google Cloud Storage URI of the object, e.g. "gs://<bucket_name>/<path>/<object_id>".
+        """
+
+        if not object_id:
+            raise McProgrammingError("Object ID is unset.")
+
+        uri = "gs://{host}/{remote_path}".format(
+            host=self.__bucket_config.bucket_name(),
+            remote_path=self._remote_path(path_prefix=self.__bucket_config.path_prefix(), object_id=object_id),
+        )
+
+        return uri
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/media_info.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/media_info.py
new file mode 100644
index 0000000000..b1bbaaf344
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/media_info.py
@@ -0,0 +1,190 @@
+import dataclasses
+import math
+import os
+from typing import Type, Optional, List, Any, Dict
+
+# noinspection PyPackageRequirements
+import ffmpeg
+
+from mediawords.util.log import create_logger
+from mediawords.workflow.exceptions import McProgrammingError, McPermanentError
+
+from .audio_codecs import (
+    AbstractAudioCodec,
+    Linear16AudioCodec,
+    FLACAudioCodec,
+    MULAWAudioCodec,
+    OggOpusAudioCodec,
+    MP3AudioCodec,
+)
+
+log = create_logger(__name__)
+
+_SUPPORTED_CODEC_CLASSES = {
+    Linear16AudioCodec,
+    FLACAudioCodec,
+    MULAWAudioCodec,
+    OggOpusAudioCodec,
+    MP3AudioCodec,
+}
+"""Supported native audio codec classes."""
+
+MediaFileInfoAudioStreamDict = Dict[str, Any]
+
+
+@dataclasses.dataclass
+class MediaFileInfoAudioStream(object):
+    """Information about a single audio stream in a media file."""
+
+    ffmpeg_stream_index: int
+    """FFmpeg internal stream index."""
+
+    audio_codec_class: Optional[Type[AbstractAudioCodec]]
+    """Audio codec class if the stream is one of the supported types and has single (mono) channel, None otherwise."""
+
+    duration: int
+    """Duration (in seconds)."""
+
+    audio_channel_count: int
+    """Audio channel count."""
+
+    sample_rate: int
+    """Audio sample rate."""
+
+    def to_dict(self) -> MediaFileInfoAudioStreamDict:
+        return {
+            'ffmpeg_stream_index': self.ffmpeg_stream_index,
+            'audio_codec_class': self.audio_codec_class.__name__ if self.audio_codec_class else None,
+            'duration': self.duration,
+            'audio_channel_count': self.audio_channel_count,
+            'sample_rate': self.sample_rate,
+        }
+
+    @classmethod
+    def from_dict(cls, input_dict: MediaFileInfoAudioStreamDict) -> 'MediaFileInfoAudioStream':
+        return cls(
+            ffmpeg_stream_index=input_dict['ffmpeg_stream_index'],
+
+            # FIXME a bit lame to do it this way
+            audio_codec_class=globals()[input_dict['audio_codec_class']] if input_dict['audio_codec_class'] else None,
+
+            duration=input_dict['duration'],
+            audio_channel_count=input_dict['audio_channel_count'],
+            sample_rate=input_dict['sample_rate'],
+        )
+
+
+@dataclasses.dataclass
+class MediaFileInfo(object):
+    """Information about media file."""
+
+    audio_streams: List[MediaFileInfoAudioStream]
+    """List of audio streams found in the media file."""
+
+    has_video_streams: bool
+    """True if the media file has video streams."""
+
+    def best_supported_audio_stream(self) -> Optional[MediaFileInfoAudioStream]:
+        """Return the first supported audio stream, if any."""
+        for stream in self.audio_streams:
+            if stream.audio_codec_class:
+                return stream
+        return None
+
+
+def media_file_info(media_file_path: str) -> MediaFileInfo:
+    """
+    Read audio / video media file information, or raise if it can't be read.
+
+    :param media_file_path: Full path to media file.
+    :return: MediaFileInfo object.
+    """
+    if not os.path.isfile(media_file_path):
+        # Input file should exist at this point; it it doesn't, we have probably messed up something in the code
+        raise McProgrammingError(f"Input file {media_file_path} does not exist.")
+
+    try:
+        file_info = ffmpeg.probe(media_file_path)
+        if not file_info:
+            raise Exception("Returned metadata is empty.")
+    except Exception as ex:
+        raise McPermanentError(f"Unable to read metadata from file {media_file_path}: {ex}")
+
+    if 'streams' not in file_info:
+        # FFmpeg should come up with some sort of a stream in any case
+        raise McProgrammingError("Returned probe doesn't have 'streams' key.")
+
+    # Test if one of the audio streams is of one of the supported codecs
+    audio_streams = []
+    has_video_streams = False
+    for stream in file_info['streams']:
+        if stream['codec_type'] == 'audio':
+
+            try:
+                audio_channel_count = int(stream['channels'])
+                if audio_channel_count == 0:
+                    raise Exception("Audio channel count is 0")
+            except Exception as ex:
+                log.warning(f"Unable to read audio channel count from stream {stream}: {ex}")
+                # Just skip this stream if we can't figure it out
+                continue
+
+            audio_codec_class = None
+
+            # We'll need to transcode audio files with more than one channel count anyway
+            if audio_channel_count == 1:
+                for codec in _SUPPORTED_CODEC_CLASSES:
+                    if codec.ffmpeg_stream_is_this_codec(ffmpeg_stream=stream):
+                        audio_codec_class = codec
+                        break
+
+            try:
+
+                if 'duration' in stream:
+                    # 'duration': '3.766621'
+                    duration = math.floor(float(stream['duration']))
+
+                elif 'DURATION' in stream.get('tags', {}):
+                    # 'DURATION': '00:00:03.824000000'
+                    duration_parts = stream['tags']['DURATION'].split(':')
+                    if len(duration_parts) != 3:
+                        raise McPermanentError(f"Unable to parse 'DURATION': {duration_parts}")
+
+                    hh = int(duration_parts[0])
+                    mm = int(duration_parts[1])
+                    ss_ms = duration_parts[2].split('.')
+
+                    if len(ss_ms) == 1:
+                        ss = int(ss_ms[0])
+                        ms = 0
+                    elif len(ss_ms) == 2:
+                        ss = int(ss_ms[0])
+                        ms = int(ss_ms[1])
+                    else:
+                        raise McPermanentError(f"Unable to parse 'DURATION': {duration_parts}")
+
+                    duration = hh * 3600 + mm * 60 + ss + (1 if ms > 0 else 0)
+
+                else:
+                    raise McPermanentError(f"Stream doesn't have duration: {stream}")
+
+                audio_stream = MediaFileInfoAudioStream(
+                    ffmpeg_stream_index=stream['index'],
+                    audio_codec_class=audio_codec_class,
+                    duration=duration,
+                    audio_channel_count=audio_channel_count,
+                    sample_rate=int(stream['sample_rate']),
+                )
+                audio_streams.append(audio_stream)
+
+            except Exception as ex:
+                # Just skip this stream if we can't figure it out
+                log.warning(f"Unable to read audio stream data for stream {stream}: {ex}")
+
+        elif stream['codec_type'] == 'video':
+            has_video_streams = True
+
+    return MediaFileInfo(
+        audio_streams=audio_streams,
+        has_video_streams=has_video_streams,
+    )
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/speech_api.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/speech_api.py
new file mode 100644
index 0000000000..b7c100677b
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/speech_api.py
@@ -0,0 +1,195 @@
+from typing import Optional
+
+# noinspection PyPackageRequirements
+from google.api_core.exceptions import InvalidArgument, NotFound
+# noinspection PyPackageRequirements
+from google.api_core.operation import from_gapic, Operation
+# noinspection PyPackageRequirements
+from google.api_core.retry import Retry
+# noinspection PyPackageRequirements
+from google.cloud.speech_v1p1beta1 import (
+    SpeechClient, RecognitionConfig, RecognitionAudio, LongRunningRecognizeResponse, LongRunningRecognizeMetadata,
+)
+
+from mediawords.util.log import create_logger
+from mediawords.workflow.exceptions import McProgrammingError
+
+from .config import GCAuthConfig
+from .transcript import Transcript, UtteranceAlternative, Utterance
+from .media_info import MediaFileInfoAudioStream
+
+log = create_logger(__name__)
+
+# Speech API sometimes throws:
+#
+#   google.api_core.exceptions.ServiceUnavailable: 503 failed to connect to all addresses
+#
+# so let it retry for 10 minutes or so.
+_GOOGLE_API_RETRIES = Retry(initial=5, maximum=60, multiplier=2, deadline=60 * 10)
+"""Google Cloud API's own retry policy."""
+
+
+def submit_transcribe_operation(gs_uri: str,
+                                episode_metadata: MediaFileInfoAudioStream,
+                                bcp47_language_code: str,
+                                gc_auth_config: Optional[GCAuthConfig] = None) -> str:
+    """
+    Submit a Speech API long running operation to transcribe a podcast episode.
+
+    :param gs_uri: Google Cloud Storage URI to a transcoded episode.
+    :param episode_metadata: Metadata derived from the episode while transcoding it.
+    :param bcp47_language_code: Episode's BCP 47 language code guessed from story's title + description.
+    :param gc_auth_config: Google Cloud authentication configuration instance.
+    :return Google Speech API operation ID by which the transcription operation can be referred to.
+    """
+
+    if not gc_auth_config:
+        gc_auth_config = GCAuthConfig()
+
+    try:
+        client = SpeechClient.from_service_account_json(gc_auth_config.json_file())
+    except Exception as ex:
+        raise McProgrammingError(f"Unable to create Speech API client: {ex}")
+
+    try:
+        # noinspection PyTypeChecker
+        config = RecognitionConfig(
+            encoding=getattr(RecognitionConfig.AudioEncoding, episode_metadata.audio_codec_class.speech_api_codec()),
+            sample_rate_hertz=episode_metadata.sample_rate,
+            # We always set the channel count to 1 and disable separate recognition per channel as our inputs are all
+            # mono audio files and do not have separate speakers per audio channel.
+            audio_channel_count=1,
+            enable_separate_recognition_per_channel=False,
+            language_code=bcp47_language_code,
+            alternative_language_codes=[],
+            speech_contexts=[
+                # Speech API works pretty well without custom contexts
+            ],
+            # Don't care that much about word confidence
+            enable_word_confidence=False,
+            # Punctuation doesn't work that well but we still enable it here
+            enable_automatic_punctuation=True,
+            # Not setting 'model' as 'use_enhanced' will then choose the best model for us
+            # Using enhanced (more expensive) model, where available
+            use_enhanced=True,
+        )
+    except Exception as ex:
+        raise McProgrammingError(f"Unable to initialize Speech API configuration: {ex}")
+
+    log.info(f"Submitting a Speech API operation for URI {gs_uri}...")
+
+    try:
+
+        # noinspection PyTypeChecker
+        audio = RecognitionAudio(uri=gs_uri)
+
+        speech_operation = client.long_running_recognize(config=config, audio=audio, retry=_GOOGLE_API_RETRIES)
+
+    except Exception as ex:
+        # If client's own retry mechanism doesn't work, then it's probably a programming error, e.g. outdated API client
+        raise McProgrammingError(f"Unable to submit a Speech API operation: {ex}")
+
+    try:
+        # We get the operation name in a try-except block because accessing it is not that well documented, so Google
+        # might change the property names whenever they please and we wouldn't necessarily notice otherwise
+        operation_id = speech_operation.operation.name
+        if not operation_id:
+            raise McProgrammingError(f"Operation name is empty.")
+    except Exception as ex:
+        raise McProgrammingError(f"Unable to get operation name: {ex}")
+
+    log.info(f"Submitted Speech API operation for URI {gs_uri}")
+
+    return operation_id
+
+
+def fetch_transcript(speech_operation_id: str, gc_auth_config: Optional[GCAuthConfig] = None) -> Optional[Transcript]:
+    """
+    Try to fetch a transcript for a given speech operation ID.
+
+    :param speech_operation_id: Speech operation ID.
+    :param gc_auth_config: Google Cloud authentication configuration instance.
+    :return: Transcript, or None if the transcript hasn't been prepared yet.
+    """
+    if not speech_operation_id:
+        raise McProgrammingError(f"Speech operation ID is unset.")
+
+    if not gc_auth_config:
+        gc_auth_config = GCAuthConfig()
+
+    try:
+        client = SpeechClient.from_service_account_json(gc_auth_config.json_file())
+    except Exception as ex:
+        raise McProgrammingError(f"Unable to initialize Speech API operations client: {ex}")
+
+    try:
+        operation = client.transport.operations_client.get_operation(
+            name=speech_operation_id,
+            retry=_GOOGLE_API_RETRIES,
+        )
+    except InvalidArgument as ex:
+        raise McProgrammingError(f"Invalid operation ID '{speech_operation_id}': {ex}")
+    except NotFound as ex:
+        raise McProgrammingError(f"Operation ID '{speech_operation_id}' was not found: {ex}")
+    except Exception as ex:
+        # On any other errors, raise a hard exception
+        raise McProgrammingError(f"Error while fetching operation ID '{speech_operation_id}': {ex}")
+
+    if not operation:
+        raise McProgrammingError(f"Operation is unset.")
+
+    try:
+        gapic_operation: Operation = from_gapic(
+            operation=operation,
+            operations_client=client.transport.operations_client,
+            result_type=LongRunningRecognizeResponse,
+            metadata_type=LongRunningRecognizeMetadata,
+            retry=_GOOGLE_API_RETRIES,
+        )
+    except Exception as ex:
+        raise McProgrammingError(f"Unable to create GAPIC operation: {ex}")
+
+    log.debug(f"GAPIC operation: {gapic_operation}")
+    log.debug(f"Operation metadata: {gapic_operation.metadata}")
+    log.debug(f"Operation is done: {gapic_operation.done()}")
+    log.debug(f"Operation error: {gapic_operation.done()}")
+
+    try:
+        operation_is_done = gapic_operation.done(retry=_GOOGLE_API_RETRIES)
+    except Exception as ex:
+        # 'done' attribute might be gone in a newer version of the Speech API client
+        raise McProgrammingError(
+            f"Unable to test whether operation '{speech_operation_id}' is done: {ex}"
+        )
+
+    if not operation_is_done:
+        log.info(f"Operation '{speech_operation_id}' is still not done.")
+        return None
+
+    utterances = []
+
+    try:
+        for result in gapic_operation.result(retry=_GOOGLE_API_RETRIES).results:
+
+            alternatives = []
+            for alternative in result.alternatives:
+                alternatives.append(
+                    UtteranceAlternative(
+                        text=alternative.transcript.strip(),
+                        confidence=alternative.confidence,
+                    )
+                )
+
+            utterances.append(
+                Utterance(
+                    alternatives=alternatives,
+                    bcp47_language_code=result.language_code,
+                )
+            )
+
+    except Exception as ex:
+        raise McProgrammingError(
+            f"Unable to read transcript for operation '{speech_operation_id}' due to other error: {ex}"
+        )
+
+    return Transcript(utterances=utterances)
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/transcode.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/transcode.py
new file mode 100644
index 0000000000..627430e2e0
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/transcode.py
@@ -0,0 +1,92 @@
+import subprocess
+import os
+
+from mediawords.util.log import create_logger
+from mediawords.workflow.exceptions import McProgrammingError, McPermanentError
+
+from .media_info import media_file_info
+
+log = create_logger(__name__)
+
+
+def maybe_transcode_file(input_file: str, maybe_output_file: str) -> bool:
+    """
+    Transcode file (if needed) to something that Speech API will support.
+
+    * If input has a video stream, it will be discarded;
+    * If input has more than one audio stream, others will be discarded leaving only one (preferably the one that Speech
+      API can support);
+    * If input doesn't have an audio stream in Speech API-supported codec, it will be transcoded to lossless
+      FLAC 16 bit in order to preserve quality;
+    * If the chosen audio stream has multiple channels (e.g. stereo or 5.1), it will be mixed into a single (mono)
+      channel as Speech API supports multi-channel recognition only when different voices speak into each of the
+      channels.
+
+    :param input_file: Input media file to consider for transcoding.
+    :param maybe_output_file: If we decide to transcode, output media file to transcode to.
+    :return: True if file had to be transcoded into "maybe_output_file", or False if input file can be used as it is.
+    """
+
+    if not os.path.isfile(input_file):
+        raise McProgrammingError(f"File '{input_file}' does not exist.")
+
+    # Independently from what <enclosure /> has told us, identify the file type again ourselves
+    media_info = media_file_info(media_file_path=input_file)
+
+    if not media_info.audio_streams:
+        raise McPermanentError("Downloaded file doesn't appear to have any audio streams.")
+
+    ffmpeg_args = []
+
+    supported_audio_stream = media_info.best_supported_audio_stream()
+    if supported_audio_stream:
+        log.info(f"Found a supported audio stream")
+
+        # Test if there is more than one audio stream
+        if len(media_info.audio_streams) > 1:
+            log.info(f"Found other audio streams besides the supported one, will discard those")
+
+            ffmpeg_args.extend(['-f', supported_audio_stream.audio_codec_class.ffmpeg_container_format()])
+
+            # Select all audio streams
+            ffmpeg_args.extend(['-map', '0:a'])
+
+            for stream in media_info.audio_streams:
+                # Deselect the unsupported streams
+                if stream != supported_audio_stream:
+                    ffmpeg_args.extend(['-map', f'-0:a:{stream.ffmpeg_stream_index}'])
+
+    # If a stream of a supported codec was not found, transcode it to FLAC 16 bit in order to not lose any quality
+    else:
+        log.info(f"None of the audio streams are supported by the Speech API, will transcode to FLAC")
+
+        # Map first audio stream to input 0
+        ffmpeg_args.extend(['-map', '0:a:0'])
+
+        # Transcode to FLAC (16 bit) in order to not lose any quality
+        ffmpeg_args.extend(['-acodec', 'flac'])
+        ffmpeg_args.extend(['-f', 'flac'])
+        ffmpeg_args.extend(['-sample_fmt', 's16'])
+
+        # Ensure that we end up with mono audio
+        ffmpeg_args.extend(['-ac', '1'])
+
+    # If there's video in the file (e.g. video), remove it
+    if media_info.has_video_streams:
+        # Discard all video streams
+        ffmpeg_args.extend(['-map', '-0:v'])
+
+    if not ffmpeg_args:
+        # No need to transcode -- caller should use the input file as-is
+        return False
+
+    log.info(f"Transcoding '{input_file}' to '{maybe_output_file}'...")
+
+    # I wasn't sure how to map outputs in "ffmpeg-python" library so here we call ffmpeg directly
+    ffmpeg_command = ['ffmpeg', '-nostdin', '-hide_banner', '-i', input_file] + ffmpeg_args + [maybe_output_file]
+    log.debug(f"FFmpeg command: {ffmpeg_command}")
+    subprocess.check_call(ffmpeg_command)
+
+    log.info(f"Done transcoding '{input_file}' to '{maybe_output_file}'")
+
+    return True
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/transcript.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/transcript.py
new file mode 100644
index 0000000000..a497ee3bcd
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/transcript.py
@@ -0,0 +1,75 @@
+import abc
+import dataclasses
+from typing import List, Dict, Any
+
+
+class _AbstractFromDict(object, metaclass=abc.ABCMeta):
+
+    @classmethod
+    @abc.abstractmethod
+    def from_dict(cls, input_dict: Dict[str, Any]) -> '_AbstractFromDict':
+        raise NotImplementedError
+
+
+@dataclasses.dataclass
+class UtteranceAlternative(object):
+    """One of the alternatives of what might have been said in an utterance."""
+
+    text: str
+    """Utterance text."""
+
+    confidence: float
+    """How confident Speech API is that it got it right."""
+
+    @classmethod
+    def from_dict(cls, input_dict: Dict[str, Any]) -> 'UtteranceAlternative':
+        return cls(
+            text=input_dict['text'],
+            confidence=input_dict['confidence'],
+        )
+
+
+@dataclasses.dataclass
+class Utterance(object):
+    """A single transcribed utterance (often but not always a single sentence)."""
+
+    alternatives: List[UtteranceAlternative]
+    """Alternatives of what might have been said in an utterance, ordered from the best to the worst guess."""
+
+    bcp47_language_code: str
+    """BCP 47 language code; might be different from what we've passed as the input."""
+
+    @property
+    def best_alternative(self) -> UtteranceAlternative:
+        """Return best alternative for what might have been said in an utterance."""
+        return self.alternatives[0]
+
+    @classmethod
+    def from_dict(cls, input_dict: Dict[str, Any]) -> 'Utterance':
+        return cls(
+            alternatives=[UtteranceAlternative.from_dict(x) for x in input_dict['alternatives']],
+            bcp47_language_code=input_dict['bcp47_language_code'],
+        )
+
+
+@dataclasses.dataclass
+class Transcript(object):
+    """A single transcript."""
+
+    utterances: List[Utterance]
+    """List of ordered utterances in a transcript."""
+
+    # Only Transcript is to be serialized to JSON so to_dict() is implemented only here
+    def to_dict(self) -> Dict[str, Any]:
+        return dataclasses.asdict(self)
+
+    @classmethod
+    def from_dict(cls, input_dict: Dict[str, Any]) -> 'Transcript':
+        return cls(utterances=[Utterance.from_dict(x) for x in input_dict['utterances']])
+
+    def download_text_from_transcript(self) -> str:
+        best_utterance_alternatives = []
+        for utterance in self.utterances:
+            best_utterance_alternatives.append(utterance.best_alternative.text)
+        text = "\n\n".join(best_utterance_alternatives)
+        return text
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/workflow.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/workflow.py
new file mode 100644
index 0000000000..9788b0da60
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/workflow.py
@@ -0,0 +1,322 @@
+import os
+import tempfile
+from typing import Optional
+
+# noinspection PyPackageRequirements
+from temporal.workflow import Workflow
+
+from mediawords.db import connect_to_db_or_raise
+from mediawords.dbi.downloads.store import store_content
+from mediawords.job import JobBroker
+from mediawords.util.parse_json import encode_json, decode_json
+from mediawords.util.config.common import RabbitMQConfig
+from mediawords.util.identify_language import identification_would_be_reliable, language_code_for_text
+from mediawords.util.log import create_logger
+from mediawords.util.parse_html import html_strip
+from mediawords.util.url import get_url_host
+from mediawords.workflow.exceptions import McProgrammingError, McTransientError, McPermanentError
+
+from .config import PodcastTranscribeEpisodeConfig
+from .enclosure import viable_story_enclosure, StoryEnclosure, StoryEnclosureDict
+from .fetch_url import fetch_big_file
+from .gcs_store import GCSStore
+from .bcp47_lang import iso_639_1_code_to_bcp_47_identifier
+from .media_info import MediaFileInfoAudioStream, media_file_info, MediaFileInfoAudioStreamDict
+from .speech_api import submit_transcribe_operation, fetch_transcript
+from .transcode import maybe_transcode_file
+from .transcript import Transcript
+from .workflow_interface import PodcastTranscribeWorkflow, PodcastTranscribeActivities
+
+log = create_logger(__name__)
+
+
+class PodcastTranscribeActivitiesImpl(PodcastTranscribeActivities):
+    """Activities implementation."""
+
+    async def identify_story_bcp47_language_code(self, stories_id: int) -> Optional[str]:
+        log.info(f"Identifying story language for story {stories_id}...")
+
+        db = connect_to_db_or_raise()
+
+        story = db.find_by_id(table='stories', object_id=stories_id)
+        if not story:
+            raise McPermanentError(f"Story {stories_id} was not found.")
+
+        # Podcast episodes typically come with title and description set so try guessing from that
+        story_title = story['title']
+        story_description = html_strip(story['description'])
+        sample_text = f"{story_title}\n{story_description}"
+
+        bcp_47_language_code = None
+        if identification_would_be_reliable(text=sample_text):
+            iso_639_1_language_code = language_code_for_text(text=sample_text)
+
+            # Convert to BCP 47 identifier
+            bcp_47_language_code = iso_639_1_code_to_bcp_47_identifier(
+                iso_639_1_code=iso_639_1_language_code,
+                url_hint=story['url'],
+            )
+
+        log.info(f"Language code for story {stories_id} is {bcp_47_language_code}")
+
+        return bcp_47_language_code
+
+    async def determine_best_enclosure(self, stories_id: int) -> Optional[StoryEnclosureDict]:
+
+        log.info(f"Determining best enclosure for story {stories_id}...")
+
+        db = connect_to_db_or_raise()
+
+        # Find the enclosure that might work the best
+        best_enclosure = viable_story_enclosure(db=db, stories_id=stories_id)
+        if not best_enclosure:
+            raise McPermanentError(f"There were no viable enclosures found for story {stories_id}")
+
+        if best_enclosure.length:
+            if best_enclosure.length > self.config.max_enclosure_size():
+                raise McPermanentError(f"Chosen enclosure {best_enclosure} is too big.")
+
+        log.info(f"Done determining best enclosure for story {stories_id}")
+        log.debug(f"Best enclosure for story {stories_id}: {best_enclosure}")
+
+        return best_enclosure.to_dict()
+
+    async def fetch_enclosure_to_gcs(self, stories_id: int, enclosure: StoryEnclosureDict) -> None:
+
+        log.info(f"Fetching enclosure to GCS for story {stories_id}")
+        log.debug(f"Best enclosure for story {stories_id}: {enclosure}")
+
+        enclosure = StoryEnclosure.from_dict(enclosure)
+
+        with tempfile.TemporaryDirectory(prefix='fetch_enclosure_to_gcs') as temp_dir:
+            raw_enclosure_path = os.path.join(temp_dir, 'raw_enclosure')
+            fetch_big_file(url=enclosure.url, dest_file=raw_enclosure_path, max_size=self.config.max_enclosure_size())
+
+            if os.stat(raw_enclosure_path).st_size == 0:
+                # Might happen with misconfigured webservers
+                raise McPermanentError(f"Fetched file {raw_enclosure_path} is empty.")
+
+            gcs = GCSStore(bucket_config=self.config.raw_enclosures())
+            gcs.upload_object(local_file_path=raw_enclosure_path, object_id=str(stories_id))
+
+        log.info(f"Done fetching enclosure to GCS for story {stories_id}")
+
+    async def fetch_transcode_store_episode(self, stories_id: int) -> MediaFileInfoAudioStreamDict:
+
+        log.info(f"Fetching, transcoding, storing episode for story {stories_id}...")
+
+        with tempfile.TemporaryDirectory(prefix='fetch_transcode_store_episode') as temp_dir:
+            raw_enclosure_path = os.path.join(temp_dir, 'raw_enclosure')
+
+            gcs_raw_enclosures = GCSStore(bucket_config=self.config.raw_enclosures())
+            gcs_raw_enclosures.download_object(
+                object_id=str(stories_id),
+                local_file_path=raw_enclosure_path,
+            )
+            del gcs_raw_enclosures
+
+            if os.stat(raw_enclosure_path).st_size == 0:
+                # If somehow the file from GCS ended up being of zero length, then this is very much unexpected
+                raise McProgrammingError(f"Fetched file {raw_enclosure_path} is empty.")
+
+            transcoded_episode_path = os.path.join(temp_dir, 'transcoded_episode')
+
+            raw_enclosure_transcoded = maybe_transcode_file(
+                input_file=raw_enclosure_path,
+                maybe_output_file=transcoded_episode_path,
+            )
+            if not raw_enclosure_transcoded:
+                transcoded_episode_path = raw_enclosure_path
+
+            del raw_enclosure_path
+
+            gcs_transcoded_episodes = GCSStore(bucket_config=self.config.transcoded_episodes())
+            gcs_transcoded_episodes.upload_object(local_file_path=transcoded_episode_path, object_id=str(stories_id))
+
+            # (Re)read the properties of either the original or the transcoded file
+            media_info = media_file_info(media_file_path=transcoded_episode_path)
+            best_audio_stream = media_info.best_supported_audio_stream()
+
+            if not best_audio_stream.audio_codec_class:
+                raise McProgrammingError("Best audio stream doesn't have audio class set")
+
+        log.info(f"Done fetching, transcoding, storing episode for story {stories_id}")
+        log.debug(f"Best audio stream for story {stories_id}: {best_audio_stream}")
+
+        return best_audio_stream.to_dict()
+
+    async def submit_transcribe_operation(self,
+                                          stories_id: int,
+                                          episode_metadata: MediaFileInfoAudioStreamDict,
+                                          bcp47_language_code: str) -> str:
+
+        log.info(f"Submitting transcribe operation for story {stories_id}...")
+        log.debug(f"Episode metadata for story {stories_id}: {episode_metadata}")
+        log.debug(f"Language code for story {stories_id}: {bcp47_language_code}")
+
+        episode_metadata = MediaFileInfoAudioStream.from_dict(episode_metadata)
+
+        if not episode_metadata.audio_codec_class:
+            raise McProgrammingError("Best audio stream doesn't have audio class set")
+
+        gcs_transcoded_episodes = GCSStore(bucket_config=self.config.transcoded_episodes())
+        gs_uri = gcs_transcoded_episodes.object_uri(object_id=str(stories_id))
+
+        speech_operation_id = submit_transcribe_operation(
+            gs_uri=gs_uri,
+            episode_metadata=episode_metadata,
+            bcp47_language_code=bcp47_language_code,
+            gc_auth_config=self.config.gc_auth(),
+        )
+
+        log.info(f"Done submitting transcribe operation for story {stories_id}")
+        log.debug(f"Speech operation ID for story {stories_id}: {speech_operation_id}")
+
+        return speech_operation_id
+
+    async def fetch_store_raw_transcript_json(self, stories_id: int, speech_operation_id: str) -> None:
+
+        log.info(f"Fetching and storing raw transcript JSON for story {stories_id}...")
+        log.debug(f"Speech operation ID: {speech_operation_id}")
+
+        transcript = fetch_transcript(speech_operation_id=speech_operation_id, gc_auth_config=self.config.gc_auth())
+        if transcript is None:
+            raise McTransientError(f"Speech operation with ID '{speech_operation_id}' hasn't been completed yet.")
+
+        transcript_json = encode_json(transcript.to_dict())
+
+        with tempfile.TemporaryDirectory(prefix='fetch_store_raw_transcript_json') as temp_dir:
+            transcript_json_path = os.path.join(temp_dir, 'transcript.json')
+
+            with open(transcript_json_path, 'w') as f:
+                f.write(transcript_json)
+
+            gcs = GCSStore(bucket_config=self.config.transcripts())
+            gcs.upload_object(local_file_path=transcript_json_path, object_id=str(stories_id))
+
+        log.info(f"Done fetching and storing raw transcript JSON for story {stories_id}")
+
+    async def fetch_store_transcript(self, stories_id: int) -> None:
+
+        log.info(f"Fetching and storing transcript for story {stories_id}...")
+
+        with tempfile.TemporaryDirectory(prefix='fetch_store_transcript') as temp_dir:
+            transcript_json_path = os.path.join(temp_dir, 'transcript.json')
+
+            gcs = GCSStore(bucket_config=self.config.transcripts())
+            gcs.download_object(object_id=str(stories_id), local_file_path=transcript_json_path)
+
+            with open(transcript_json_path, 'r') as f:
+                transcript_json = f.read()
+
+        transcript = Transcript.from_dict(decode_json(transcript_json))
+
+        db = connect_to_db_or_raise()
+
+        story = db.find_by_id(table='stories', object_id=stories_id)
+
+        feed = db.query("""
+            SELECT *
+            FROM feeds
+            WHERE feeds_id = (
+                SELECT feeds_id
+                FROM feeds_stories_map
+                WHERE stories_id = %(stories_id)s
+            )
+        """, {
+            'stories_id': stories_id,
+        }).hash()
+
+        # Just like create_download_for_new_story(), it creates a new download except that it tests if such a download
+        # exists first
+        download = db.find_or_create(
+            table='downloads',
+            insert_hash={
+                'feeds_id': feed['feeds_id'],
+                'stories_id': story['stories_id'],
+                'url': story['url'],
+                'host': get_url_host(story['url']),
+                'type': 'content',
+                'sequence': 1,
+                'state': 'success',
+                'path': 'content:pending',
+                'priority': 1,
+                'extracted': 'f'
+            },
+        )
+
+        text = transcript.download_text_from_transcript()
+
+        # Store as a raw download and then let "extract-and-vector" app "extract" the stored text later
+        store_content(db=db, download=download, content=text)
+
+        log.info(f"Done fetching and storing transcript for story {stories_id}")
+
+    async def add_to_extraction_queue(self, stories_id: int) -> None:
+
+        log.info(f"Adding an extraction job for story {stories_id}...")
+
+        job_broker = JobBroker(
+            queue_name='MediaWords::Job::ExtractAndVector',
+            rabbitmq_config=RabbitMQConfig(
+
+                # Keep RabbitMQ's timeout smaller than the action's "start_to_close_timeout"
+                timeout=60,
+
+                # Disable retries as Temporal will be the one that does all the retrying
+                retries=None,
+            ),
+        )
+
+        # add_to_queue() is not idempotent but it's not a big deal to extract a single story twice
+        job_broker.add_to_queue(stories_id=stories_id)
+
+        log.info(f"Done adding an extraction job for story {stories_id}")
+
+
+class PodcastTranscribeWorkflowImpl(PodcastTranscribeWorkflow):
+    """Workflow implementation."""
+
+    def __init__(self):
+        self.activities: PodcastTranscribeActivities = Workflow.new_activity_stub(
+            activities_cls=PodcastTranscribeActivities,
+            # No retry_parameters here as they get set individually in @activity_method()
+        )
+
+    async def transcribe_episode(self, stories_id: int) -> None:
+
+        bcp47_language_code = await self.activities.identify_story_bcp47_language_code(stories_id)
+        if bcp47_language_code is None:
+            # Default to English in case there wasn't enough sizable text in title / description to make a good guess
+            bcp47_language_code = 'en'
+
+        enclosure = await self.activities.determine_best_enclosure(stories_id)
+        if not enclosure:
+            raise McPermanentError(f"No viable enclosure found for story {stories_id}")
+
+        await self.activities.fetch_enclosure_to_gcs(stories_id, enclosure)
+
+        episode_metadata_dict = await self.activities.fetch_transcode_store_episode(stories_id)
+
+        episode_metadata = MediaFileInfoAudioStream.from_dict(episode_metadata_dict)
+
+        max_duration = PodcastTranscribeEpisodeConfig().max_duration()
+        if episode_metadata.duration > max_duration:
+            raise McPermanentError(
+                f"Episode's duration ({episode_metadata.duration} s) exceeds max. duration ({max_duration} s)"
+            )
+
+        speech_operation_id = await self.activities.submit_transcribe_operation(
+            stories_id,
+            episode_metadata_dict,
+            bcp47_language_code,
+        )
+
+        # Wait for Google Speech API to finish up transcribing
+        await Workflow.sleep(int(episode_metadata.duration * 1.1))
+
+        await self.activities.fetch_store_raw_transcript_json(stories_id, speech_operation_id)
+
+        await self.activities.fetch_store_transcript(stories_id)
+
+        await self.activities.add_to_extraction_queue(stories_id)
diff --git a/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/workflow_interface.py b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/workflow_interface.py
new file mode 100644
index 0000000000..b44ab9694a
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/python/podcast_transcribe_episode/workflow_interface.py
@@ -0,0 +1,220 @@
+import dataclasses
+from datetime import timedelta
+from typing import Optional
+
+# noinspection PyPackageRequirements
+from temporal.activity_method import activity_method, RetryParameters
+# noinspection PyPackageRequirements
+from temporal.workflow import workflow_method
+
+from mediawords.workflow.exceptions import McPermanentError
+
+from .config import PodcastTranscribeEpisodeConfig
+from .enclosure import StoryEnclosureDict
+from .media_info import MediaFileInfoAudioStreamDict
+
+TASK_QUEUE = "podcast-transcribe-episode"
+"""Temporal task queue."""
+
+DEFAULT_RETRY_PARAMETERS = RetryParameters(
+    initial_interval=timedelta(seconds=1),
+    backoff_coefficient=2,
+    maximum_interval=timedelta(hours=2),
+    maximum_attempts=1000,
+    non_retryable_error_types=[
+        McPermanentError.__name__,
+    ],
+)
+
+
+class PodcastTranscribeActivities(object):
+
+    @classmethod
+    def _create_config(cls) -> PodcastTranscribeEpisodeConfig:
+        """
+        Create and return configuration instance to be used for running the workflow.
+
+        Might get overridden in case some configuration changes have to be made while running the tests.
+        """
+        return PodcastTranscribeEpisodeConfig()
+
+    def __init__(self):
+        super().__init__()
+        self.config = self._create_config()
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+        retry_parameters=DEFAULT_RETRY_PARAMETERS,
+    )
+    async def identify_story_bcp47_language_code(self, stories_id: int) -> Optional[str]:
+        """
+        Guess BCP 47 language code of a story.
+
+        https://cloud.google.com/speech-to-text/docs/languages
+
+        :param stories_id: Story to guess the language code for.
+        :return: BCP 47 language code (e.g. 'en-US') or None if the language code could not be determined.
+        """
+        raise NotImplementedError
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+        retry_parameters=DEFAULT_RETRY_PARAMETERS,
+    )
+    async def determine_best_enclosure(self, stories_id: int) -> Optional[StoryEnclosureDict]:
+        """
+        Fetch a list of story enclosures, determine which one looks like a podcast episode the most.
+
+        Uses <enclosure /> or similar tag.
+
+        :param stories_id: Story to fetch the enclosures for.
+        :return: Best enclosure metadata object (as dict), or None if no best enclosure could be determined.
+        """
+        raise NotImplementedError
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        # With a super-slow server, it's probably reasonable to expect that it might take a few hours to fetch a single
+        # episode
+        start_to_close_timeout=timedelta(hours=2),
+        retry_parameters=dataclasses.replace(
+            DEFAULT_RETRY_PARAMETERS,
+
+            # Wait for a minute before trying again
+            initial_interval=timedelta(minutes=1),
+
+            # Hope for the server to resurrect in a week
+            maximum_interval=timedelta(weeks=1),
+
+            # Don't kill ourselves trying to hit a permanently dead server
+            maximum_attempts=50,
+        ),
+    )
+    async def fetch_enclosure_to_gcs(self, stories_id: int, enclosure: StoryEnclosureDict) -> None:
+        """
+        Fetch enclosure and store it to GCS as an episode.
+
+        Doesn't do transcoding or anything because transcoding or any subsequent steps might fail, and if they do, we
+        want to have the raw episode fetched and safely stored somewhere.
+
+        :param stories_id: Story to fetch the enclosure for.
+        :param enclosure: Enclosure to fetch (as dict).
+        """
+        raise NotImplementedError
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+
+        # Let's expect super long episodes or super slow servers
+        start_to_close_timeout=timedelta(hours=2),
+
+        retry_parameters=dataclasses.replace(
+            DEFAULT_RETRY_PARAMETERS,
+
+            # Wait for a minute before trying again (GCS might be down)
+            initial_interval=timedelta(minutes=1),
+
+            # Hope for GCS to resurrect in a day
+            maximum_interval=timedelta(days=1),
+
+            # Limit attempts because transcoding itself might be broken, and we don't want to be fetching huge objects
+            # from GCS periodically
+            maximum_attempts=20,
+        ),
+    )
+    async def fetch_transcode_store_episode(self, stories_id: int) -> MediaFileInfoAudioStreamDict:
+        """
+        Fetch episode from GCS, transcode it if needed and store it to GCS again in a separate bucket.
+
+        Now that the raw episode file is safely located in GCS, we can try transcoding it.
+
+        :param stories_id: Story ID the episode of which should be transcoded.
+        :return: Metadata of the best audio stream determined as part of the transcoding (as dict).
+        """
+        raise NotImplementedError
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+
+        # Give a bit more time as the implementation is likely to do some non-Temporal retries on weird Speech API
+        # errors
+        start_to_close_timeout=timedelta(minutes=5),
+
+        retry_parameters=dataclasses.replace(
+            DEFAULT_RETRY_PARAMETERS,
+
+            # Given that the thing is costly, wait a whole hour before retrying anything
+            initial_interval=timedelta(hours=1),
+
+            # Hope for the Speech API to resurrect in a week
+            maximum_interval=timedelta(weeks=1),
+
+            # Don't retry too much as each try is potentially very costly
+            maximum_attempts=10,
+        ),
+    )
+    async def submit_transcribe_operation(self,
+                                          stories_id: int,
+                                          episode_metadata: MediaFileInfoAudioStreamDict,
+                                          bcp47_language_code: str) -> str:
+        """
+        Submit a long-running transcription operation to the Speech API.
+
+        :param stories_id: Story ID of the episode which should be submitted for transcribing.
+        :param episode_metadata: Metadata of transcoded episode (as dict).
+        :param bcp47_language_code: BCP 47 language code of the story.
+        :return: Speech API operation ID for the transcription operation.
+        """
+        raise NotImplementedError
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+        retry_parameters=DEFAULT_RETRY_PARAMETERS,
+    )
+    async def fetch_store_raw_transcript_json(self, stories_id: int, speech_operation_id: str) -> None:
+        """
+        Fetch a finished transcription and store the raw JSON of it into a GCS bucket.
+
+        Raises an exception if the transcription operation is not finished yet.
+
+        :param stories_id: Story ID the episode of which should be submitted for transcribing.
+        :param speech_operation_id: Speech API operation ID.
+        """
+        raise NotImplementedError
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+        retry_parameters=DEFAULT_RETRY_PARAMETERS,
+    )
+    async def fetch_store_transcript(self, stories_id: int) -> None:
+        """
+        Fetch a raw JSON transcript from a GCS bucket, store it to "download_texts".
+
+        :param stories_id: Story ID the transcript of which should be stored into the database.
+        """
+        raise NotImplementedError
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(minutes=2),
+        retry_parameters=DEFAULT_RETRY_PARAMETERS,
+    )
+    async def add_to_extraction_queue(self, stories_id: int) -> None:
+        """
+        Add a story to the extraction queue.
+
+        :param stories_id: Story ID to be added to the extraction queue.
+        """
+        raise NotImplementedError
+
+
+class PodcastTranscribeWorkflow(object):
+    """Workflow interface."""
+
+    @workflow_method(task_queue=TASK_QUEUE)
+    async def transcribe_episode(self, stories_id: int) -> None:
+        raise NotImplementedError
diff --git a/apps/podcast-transcribe-episode/src/requirements.txt b/apps/podcast-transcribe-episode/src/requirements.txt
new file mode 100644
index 0000000000..cdd9a7b8bd
--- /dev/null
+++ b/apps/podcast-transcribe-episode/src/requirements.txt
@@ -0,0 +1,3 @@
+ffmpeg-python==0.2.0
+google-cloud-speech==2.3.0
+google-cloud-storage==1.38.0
diff --git a/apps/podcast-fetch-episode/tests/data/media-samples b/apps/podcast-transcribe-episode/tests/data/media-samples
similarity index 100%
rename from apps/podcast-fetch-episode/tests/data/media-samples
rename to apps/podcast-transcribe-episode/tests/data/media-samples
diff --git a/apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/__init__.py b/apps/podcast-transcribe-episode/tests/python/__init__.py
similarity index 100%
rename from apps/podcast-poll-due-operations/src/python/podcast_poll_due_operations/__init__.py
rename to apps/podcast-transcribe-episode/tests/python/__init__.py
diff --git a/apps/podcast-transcribe-episode/tests/python/random_gcs_prefix.py b/apps/podcast-transcribe-episode/tests/python/random_gcs_prefix.py
new file mode 100644
index 0000000000..1ffc4e59c9
--- /dev/null
+++ b/apps/podcast-transcribe-episode/tests/python/random_gcs_prefix.py
@@ -0,0 +1,16 @@
+import datetime
+
+from mediawords.util.text import random_string
+
+
+def random_gcs_path_prefix() -> str:
+    """
+    Generates a random path prefix to store the objects at.
+
+    Makes it easier to debug what gets written to GCS and get rid of said objects afterwards.
+    """
+
+    date = datetime.datetime.utcnow().isoformat()
+    date = date.replace(':', '_')
+    prefix = f'tests-{date}-{random_string(length=32)}'
+    return prefix
diff --git a/apps/podcast-fetch-episode/tests/python/test_bcp47_lang.py b/apps/podcast-transcribe-episode/tests/python/test_bcp47_lang.py
similarity index 85%
rename from apps/podcast-fetch-episode/tests/python/test_bcp47_lang.py
rename to apps/podcast-transcribe-episode/tests/python/test_bcp47_lang.py
index ce52f78140..a1f9604162 100644
--- a/apps/podcast-fetch-episode/tests/python/test_bcp47_lang.py
+++ b/apps/podcast-transcribe-episode/tests/python/test_bcp47_lang.py
@@ -1,5 +1,5 @@
 # noinspection PyProtectedMember
-from podcast_fetch_episode.bcp47_lang import _country_tld_from_url, iso_639_1_code_to_bcp_47_identifier
+from podcast_transcribe_episode.bcp47_lang import _country_tld_from_url, iso_639_1_code_to_bcp_47_identifier
 
 
 def test_country_tld_from_url():
diff --git a/apps/podcast-fetch-episode/tests/python/test_enclosure.py b/apps/podcast-transcribe-episode/tests/python/test_enclosure.py
similarity index 94%
rename from apps/podcast-fetch-episode/tests/python/test_enclosure.py
rename to apps/podcast-transcribe-episode/tests/python/test_enclosure.py
index 76813bd367..5d3d1be33b 100644
--- a/apps/podcast-fetch-episode/tests/python/test_enclosure.py
+++ b/apps/podcast-transcribe-episode/tests/python/test_enclosure.py
@@ -9,7 +9,7 @@
     create_test_story,
 )
 
-from podcast_fetch_episode.enclosure import podcast_viable_enclosure_for_story, StoryEnclosure
+from podcast_transcribe_episode.enclosure import viable_story_enclosure, StoryEnclosure
 
 
 @dataclasses.dataclass
@@ -43,7 +43,7 @@ def test_no_enclosures(self):
             )
         )
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=no_enclosures.stories_id,
         ) is None, "Story with no enclosures."
@@ -66,7 +66,7 @@ def test_enclosure_with_empty_url(self):
             })
         )
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=enclosure_with_empty_url.stories_id,
         ) is None, "Story with an empty enclosure URL."
@@ -89,7 +89,7 @@ def test_single_mp3_enclosure(self):
             })
         )
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=single_mp3_enclosure.stories_id,
         ) == StoryEnclosure.from_db_row(single_mp3_enclosure.enclosures[0]), (
@@ -114,7 +114,7 @@ def test_single_mp3_without_mime_enclosure(self):
             })
         )
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=single_mp3_without_mime_enclosure.stories_id,
         ) == StoryEnclosure.from_db_row(single_mp3_without_mime_enclosure.enclosures[0]), (
@@ -145,7 +145,7 @@ def test_multiple_audio_enclosures(self):
             }),
         ])
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=multiple_audio_enclosures.stories_id,
         ) == StoryEnclosure.from_db_row(multiple_audio_enclosures.enclosures[1]), (
@@ -176,7 +176,7 @@ def test_multiple_unsupported_audio_enclosures(self):
             }),
         ])
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=multiple_unsupported_audio_enclosures.stories_id,
         ) == StoryEnclosure.from_db_row(multiple_unsupported_audio_enclosures.enclosures[0]), (
@@ -207,7 +207,7 @@ def test_audio_and_video_enclosures(self):
             }),
         ])
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=audio_and_video_enclosures.stories_id,
         ) == StoryEnclosure.from_db_row(audio_and_video_enclosures.enclosures[1]), (
@@ -238,7 +238,7 @@ def test_only_video_enclosures(self):
             }),
         ])
 
-        assert podcast_viable_enclosure_for_story(
+        assert viable_story_enclosure(
             db=self._DB,
             stories_id=only_video_enclosures.stories_id,
         ) == StoryEnclosure.from_db_row(only_video_enclosures.enclosures[0]), (
diff --git a/apps/podcast-fetch-episode/tests/python/test_fetch_url.py b/apps/podcast-transcribe-episode/tests/python/test_fetch_url.py
similarity index 92%
rename from apps/podcast-fetch-episode/tests/python/test_fetch_url.py
rename to apps/podcast-transcribe-episode/tests/python/test_fetch_url.py
index 5b546a4c7f..7106a3882c 100644
--- a/apps/podcast-fetch-episode/tests/python/test_fetch_url.py
+++ b/apps/podcast-transcribe-episode/tests/python/test_fetch_url.py
@@ -4,13 +4,14 @@
 from typing import Union
 from unittest import TestCase
 
+# noinspection PyPackageRequirements
 import pytest
 
 from mediawords.test.hash_server import HashServer
 from mediawords.util.network import random_unused_port
+from mediawords.workflow.exceptions import McPermanentError
 
-from podcast_fetch_episode.exceptions import McPodcastFileFetchFailureException
-from podcast_fetch_episode.fetch_url import fetch_big_file
+from podcast_transcribe_episode.fetch_url import fetch_big_file
 
 
 class TestFetchBigFile(TestCase):
@@ -74,6 +75,6 @@ def test_max_size(self):
 
         max_size = len(self.__mock_data) - 1000
         # Function should refuse to fetch more than {max_size} bytes
-        with pytest.raises(McPodcastFileFetchFailureException):
+        with pytest.raises(McPermanentError):
             fetch_big_file(url=self.__url, dest_file=self.__dest_file, max_size=max_size)
         assert not os.path.isfile(self.__dest_file), f"File '{self.__dest_file}' should exist after a failed download."
diff --git a/apps/podcast-fetch-episode/tests/python/test_gcs_store.py b/apps/podcast-transcribe-episode/tests/python/test_gcs_store.py
similarity index 52%
rename from apps/podcast-fetch-episode/tests/python/test_gcs_store.py
rename to apps/podcast-transcribe-episode/tests/python/test_gcs_store.py
index 71aa095811..4f732da6dc 100644
--- a/apps/podcast-fetch-episode/tests/python/test_gcs_store.py
+++ b/apps/podcast-transcribe-episode/tests/python/test_gcs_store.py
@@ -1,23 +1,31 @@
+import filecmp
 import os
 import tempfile
 from unittest import TestCase
 
+# noinspection PyPackageRequirements
 import pytest
 
-from podcast_fetch_episode.config import PodcastFetchEpisodeConfig
-from podcast_fetch_episode.exceptions import McPodcastMisconfiguredGCSException
+from mediawords.workflow.exceptions import McProgrammingError, McPermanentError
 
-from podcast_fetch_episode.gcs_store import GCSStore
+from podcast_transcribe_episode.config import RawEnclosuresGCBucketConfig
+from podcast_transcribe_episode.gcs_store import GCSStore
 
-from .config_random_gcs_prefix import RandomPathPrefixConfig
+from .random_gcs_prefix import random_gcs_path_prefix
+
+
+class _RandomPrefixBucketConfig(RawEnclosuresGCBucketConfig):
+    """Bucket with random path prefix."""
+
+    def __init__(self):
+        super().__init__(path_prefix=random_gcs_path_prefix())
 
 
 class TestGCSStore(TestCase):
 
     def test_remote_path(self):
-
         # Empty object ID
-        with pytest.raises(McPodcastMisconfiguredGCSException):
+        with pytest.raises(McProgrammingError):
             GCSStore._remote_path(path_prefix='', object_id='')
 
         assert GCSStore._remote_path(path_prefix='', object_id='a') == 'a'
@@ -35,52 +43,34 @@ def test_remote_path(self):
 
         assert GCSStore._remote_path(path_prefix='//', object_id='//a///b//../b/c') == 'a/b/c'
 
-    def test_object_uri(self):
-        gcs = GCSStore()
-
-        # Empty object ID
-        with pytest.raises(McPodcastMisconfiguredGCSException):
-            gcs.object_uri(object_id='')
-
-        class NoPathPrefixConfig(PodcastFetchEpisodeConfig):
-
-            @staticmethod
-            def gc_storage_path_prefix() -> str:
-                return ''
-
-        config = NoPathPrefixConfig()
-        gcs = GCSStore(config=config)
-        assert gcs.object_uri(object_id='a') == f'gs://{config.gc_storage_bucket_name()}/a'
-
-        class MultiPathPrefixConfig(PodcastFetchEpisodeConfig):
-
-            @staticmethod
-            def gc_storage_path_prefix() -> str:
-                return '//foo/bar//'
-
-        config = MultiPathPrefixConfig()
-        gcs = GCSStore(config=config)
-        assert gcs.object_uri(object_id='a') == f'gs://{config.gc_storage_bucket_name()}/foo/bar/a'
-
     def test_store_exists_delete(self):
-        config = RandomPathPrefixConfig()
-        gcs = GCSStore(config=config)
+        config = _RandomPrefixBucketConfig()
+        gcs = GCSStore(bucket_config=config)
 
         object_id = 'test'
         assert gcs.object_exists(object_id=object_id) is False
 
         mock_data = os.urandom(1024 * 10)
-        temp_file = os.path.join(tempfile.mkdtemp('test'), 'test')
-        with open(temp_file, mode='wb') as f:
+        src_file = os.path.join(tempfile.mkdtemp('test'), 'src')
+        with open(src_file, mode='wb') as f:
             f.write(mock_data)
 
-        gcs.store_object(local_file_path=temp_file, object_id=object_id)
+        gcs.upload_object(local_file_path=src_file, object_id=object_id)
         assert gcs.object_exists(object_id=object_id) is True
 
         # Try storing twice
-        gcs.store_object(local_file_path=temp_file, object_id=object_id)
+        gcs.upload_object(local_file_path=src_file, object_id=object_id)
         assert gcs.object_exists(object_id=object_id) is True
 
+        dst_file = os.path.join(tempfile.mkdtemp('test'), 'dst')
+        gcs.download_object(object_id=object_id, local_file_path=dst_file)
+        assert os.path.isfile(dst_file)
+        assert filecmp.cmp(src_file, dst_file, shallow=False)
+
+        # Try downloading nonexistent file
+        with pytest.raises(McPermanentError):
+            gcs.download_object(object_id='999999', local_file_path=os.path.join(tempfile.mkdtemp('test'), 'foo'))
+
         gcs.delete_object(object_id=object_id)
         assert gcs.object_exists(object_id=object_id) is False
 
diff --git a/apps/podcast-fetch-episode/tests/python/test_media_file.py b/apps/podcast-transcribe-episode/tests/python/test_media_file.py
similarity index 71%
rename from apps/podcast-fetch-episode/tests/python/test_media_file.py
rename to apps/podcast-transcribe-episode/tests/python/test_media_file.py
index 8ca91429fe..17aca0d29a 100644
--- a/apps/podcast-fetch-episode/tests/python/test_media_file.py
+++ b/apps/podcast-transcribe-episode/tests/python/test_media_file.py
@@ -1,17 +1,16 @@
 import hashlib
 import inspect
 import os
+import tempfile
 
+# noinspection PyPackageRequirements
 import pytest
 
-from podcast_fetch_episode.audio_codecs import AbstractAudioCodec
-from podcast_fetch_episode.exceptions import McPodcastFileIsInvalidException
-from podcast_fetch_episode.media_file import (
-    MediaFileInfo,
-    media_file_info,
-    TranscodeTempDirAndFile,
-    transcode_media_file_if_needed,
-)
+from mediawords.workflow.exceptions import McPermanentError
+
+from podcast_transcribe_episode.audio_codecs import AbstractAudioCodec
+from podcast_transcribe_episode.media_info import media_file_info, MediaFileInfo
+from podcast_transcribe_episode.transcode import maybe_transcode_file
 
 MEDIA_SAMPLES_PATH = '/opt/mediacloud/tests/data/media-samples/samples/'
 assert os.path.isdir(MEDIA_SAMPLES_PATH), f"Directory with media samples '{MEDIA_SAMPLES_PATH}' should exist."
@@ -37,8 +36,7 @@ def test_media_file_info():
 
         if '-invalid' in filename:
 
-            # 
-            with pytest.raises(McPodcastFileIsInvalidException):
+            with pytest.raises(McPermanentError):
                 media_file_info(media_file_path=input_file_path)
 
         else:
@@ -79,35 +77,42 @@ def _file_sha1_hash(file_path: str) -> str:
     return sha1.hexdigest()
 
 
-def test_transcode_media_file_if_needed():
-    """Test transcode_media_if_needed()."""
-
+def test_maybe_transcode_file():
     for filename in SAMPLE_FILENAMES:
         input_file_path = os.path.join(MEDIA_SAMPLES_PATH, filename)
         assert os.path.isfile(input_file_path), f"Input file '{filename}' exists."
 
         before_sha1_hash = _file_sha1_hash(input_file_path)
 
-        input_media_file = TranscodeTempDirAndFile(temp_dir=MEDIA_SAMPLES_PATH, filename=filename)
-
         if '-noaudio' in filename:
 
             # Media file with no audio
-            with pytest.raises(McPodcastFileIsInvalidException):
-                transcode_media_file_if_needed(input_media_file=input_media_file)
+            with pytest.raises(McPermanentError):
+                maybe_transcode_file(
+                    input_file=input_file_path,
+                    maybe_output_file=os.path.join(tempfile.mkdtemp('test'), 'test'),
+                )
 
         elif '-invalid' in filename:
 
             # Invalid media file
-            with pytest.raises(McPodcastFileIsInvalidException):
-                transcode_media_file_if_needed(input_media_file=input_media_file)
+            with pytest.raises(McPermanentError):
+                maybe_transcode_file(
+                    input_file=input_file_path,
+                    maybe_output_file=os.path.join(tempfile.mkdtemp('test'), 'test'),
+                )
 
         else:
-            output_media_file = transcode_media_file_if_needed(input_media_file=input_media_file)
+            maybe_output_file = os.path.join(tempfile.mkdtemp('test'), 'test')
 
-            assert output_media_file, f"Output media file was set for filename '{filename}'."
+            media_file_transcoded = maybe_transcode_file(
+                input_file=input_file_path,
+                maybe_output_file=maybe_output_file,
+            )
 
-            output_file_info = media_file_info(media_file_path=output_media_file.temp_full_path)
+            output_file_info = media_file_info(
+                media_file_path=maybe_output_file if media_file_transcoded else input_file_path,
+            )
 
             assert not output_file_info.has_video_streams, f"There should be no video streams in '{filename}'."
             assert len(output_file_info.audio_streams) == 1, f"There should be only one audio stream in '{filename}'."
@@ -122,13 +127,11 @@ def test_transcode_media_file_if_needed():
             assert audio_stream.audio_channel_count == 1, f"Output file should be only mono for filename '{filename}'."
 
             if '-mp3-mono' in filename:
-                assert (
-                        output_media_file.temp_full_path == input_media_file.temp_full_path
-                ), "Mono MP3 file shouldn't have been transcoded."
+                assert media_file_transcoded is False, "Mono MP3 file shouldn't have been transcoded."
+                assert not os.path.isfile(maybe_output_file), "Output file should not exist."
             else:
-                assert (
-                        output_media_file.temp_full_path != input_media_file.temp_full_path
-                ), f"File '{filename}' should have been transcoded."
+                assert media_file_transcoded is True, f"File '{filename}' should have been transcoded."
+                assert os.path.isfile(maybe_output_file), "Output file should exist."
 
         after_sha1_hash = _file_sha1_hash(input_file_path)
 
diff --git a/apps/podcast-transcribe-episode/tests/python/test_workflow.py b/apps/podcast-transcribe-episode/tests/python/test_workflow.py
new file mode 100644
index 0000000000..765375530a
--- /dev/null
+++ b/apps/podcast-transcribe-episode/tests/python/test_workflow.py
@@ -0,0 +1,183 @@
+import os
+from datetime import timedelta
+from typing import Union
+
+# noinspection PyPackageRequirements
+import pytest
+# noinspection PyPackageRequirements
+from temporal.workerfactory import WorkerFactory
+# noinspection PyPackageRequirements
+from temporal.workflow import WorkflowOptions
+
+from mediawords.db import connect_to_db
+from mediawords.dbi.downloads.store import fetch_content
+from mediawords.test.db.create import create_test_medium, create_test_feed, create_test_story
+from mediawords.test.hash_server import HashServer
+from mediawords.util.log import create_logger
+from mediawords.util.network import random_unused_port
+from mediawords.workflow.client import workflow_client
+from mediawords.workflow.worker import stop_worker_faster
+
+from podcast_transcribe_episode.config import (
+    PodcastTranscribeEpisodeConfig,
+    AbstractGCBucketConfig,
+    RawEnclosuresGCBucketConfig,
+    TranscodedEpisodesGCBucketConfig,
+    TranscriptsGCBucketConfig,
+)
+from podcast_transcribe_episode.gcs_store import GCSStore
+from podcast_transcribe_episode.workflow import PodcastTranscribeActivitiesImpl, PodcastTranscribeWorkflowImpl
+from podcast_transcribe_episode.workflow_interface import (
+    TASK_QUEUE,
+    PodcastTranscribeActivities,
+    PodcastTranscribeWorkflow,
+)
+
+from .random_gcs_prefix import random_gcs_path_prefix
+
+log = create_logger(__name__)
+
+TEST_MP3_PATH = '/opt/mediacloud/tests/data/media-samples/samples/kim_kardashian-mp3-mono.mp3'
+assert os.path.isfile(TEST_MP3_PATH), f"Test MP3 file '{TEST_MP3_PATH}' should exist."
+
+
+class _RandomPrefixesPodcastTranscribeEpisodeConfig(PodcastTranscribeEpisodeConfig):
+    """Custom configuration which uses random GCS prefixes."""
+
+    __slots__ = [
+        '__raw_enclosures_config',
+        '__transcoded_episodes_config',
+        '__transcripts_config',
+    ]
+
+    def __init__(self):
+        super().__init__()
+
+        # Create bucket config classes once so that if we call the getters again, the random prefixes don't get
+        # regenerated
+        self.__raw_enclosures_config = RawEnclosuresGCBucketConfig(path_prefix=random_gcs_path_prefix())
+        self.__transcoded_episodes_config = TranscodedEpisodesGCBucketConfig(path_prefix=random_gcs_path_prefix())
+        self.__transcripts_config = TranscriptsGCBucketConfig(path_prefix=random_gcs_path_prefix())
+
+    def raw_enclosures(self) -> AbstractGCBucketConfig:
+        return self.__raw_enclosures_config
+
+    def transcoded_episodes(self) -> AbstractGCBucketConfig:
+        return self.__transcoded_episodes_config
+
+    def transcripts(self) -> AbstractGCBucketConfig:
+        return self.__transcripts_config
+
+
+# Custom activities subclass with random bucket prefixes
+class _RandomPrefixesPodcastTranscribeActivities(PodcastTranscribeActivitiesImpl):
+
+    @classmethod
+    def _create_config(cls) -> PodcastTranscribeEpisodeConfig:
+        return _RandomPrefixesPodcastTranscribeEpisodeConfig()
+
+
+@pytest.mark.asyncio
+async def test_workflow():
+    db = connect_to_db()
+
+    test_medium = create_test_medium(db=db, label='test')
+    test_feed = create_test_feed(db=db, label='test', medium=test_medium)
+
+    # 'label' is important as it will be stored in both stories.title and stories.description, which in turn will be
+    # used to guess the probable language of the podcast episode
+    test_story = create_test_story(db=db, label='keeping up with Kardashians', feed=test_feed)
+
+    stories_id = test_story['stories_id']
+
+    with open(TEST_MP3_PATH, mode='rb') as f:
+        test_mp3_data = f.read()
+
+    # noinspection PyUnusedLocal
+    def __mp3_callback(request: HashServer.Request) -> Union[str, bytes]:
+        response = "".encode('utf-8')
+        response += "HTTP/1.0 200 OK\r\n".encode('utf-8')
+        response += "Content-Type: audio/mpeg\r\n".encode('utf-8')
+        response += f"Content-Length: {len(test_mp3_data)}\r\n".encode('utf-8')
+        response += "\r\n".encode('utf-8')
+        response += test_mp3_data
+        return response
+
+    port = random_unused_port()
+    pages = {
+        '/test.mp3': {
+            'callback': __mp3_callback,
+        }
+    }
+
+    hs = HashServer(port=port, pages=pages)
+    hs.start()
+
+    # Not localhost as this might get fetched from a remote worker
+    mp3_url = hs.page_url('/test.mp3')
+
+    db.insert(table='story_enclosures', insert_hash={
+        'stories_id': stories_id,
+        'url': mp3_url,
+        'mime_type': 'audio/mpeg',
+        'length': len(test_mp3_data),
+    })
+
+    client = workflow_client()
+
+    # Start worker
+    factory = WorkerFactory(client=client, namespace=client.namespace)
+    worker = factory.new_worker(task_queue=TASK_QUEUE)
+
+    # Use an activities implementation with random GCS prefixes set
+    activities = _RandomPrefixesPodcastTranscribeActivities()
+
+    worker.register_activities_implementation(
+        activities_instance=activities,
+        activities_cls_name=PodcastTranscribeActivities.__name__,
+    )
+    worker.register_workflow_implementation_type(impl_cls=PodcastTranscribeWorkflowImpl)
+    factory.start()
+
+    # Initialize workflow instance
+    workflow: PodcastTranscribeWorkflow = client.new_workflow_stub(
+        cls=PodcastTranscribeWorkflow,
+        workflow_options=WorkflowOptions(
+            workflow_id=str(stories_id),
+
+            # By default, if individual activities of the workflow fail, they will get restarted pretty much
+            # indefinitely, and so this test might run for days (or rather just timeout on the CI). So we cap the
+            # workflow so that if it doesn't manage to complete in X minutes, we consider it as failed.
+            workflow_run_timeout=timedelta(minutes=5),
+
+        ),
+    )
+
+    # Wait for the workflow to complete
+    await workflow.transcribe_episode(stories_id)
+
+    downloads = db.select(table='downloads', what_to_select='*').hashes()
+    assert len(downloads) == 1
+    first_download = downloads[0]
+    assert first_download['stories_id'] == stories_id
+    assert first_download['type'] == 'content'
+    assert first_download['state'] == 'success'
+
+    download_content = fetch_content(db=db, download=first_download)
+
+    # It's what gets said in the sample MP3 file
+    assert 'Kim Kardashian' in download_content
+
+    # Initiate the worker shutdown in the background while we do the GCS cleanup so that the stop_workers_faster()
+    # doesn't have to wait that long
+    await worker.stop(background=True)
+
+    log.info("Cleaning up GCS...")
+    GCSStore(bucket_config=activities.config.raw_enclosures()).delete_object(object_id=str(stories_id))
+    GCSStore(bucket_config=activities.config.transcoded_episodes()).delete_object(object_id=str(stories_id))
+    GCSStore(bucket_config=activities.config.transcripts()).delete_object(object_id=str(stories_id))
+    log.info("Cleaned up GCS")
+
+    log.info("Stopping workers...")
+    await stop_worker_faster(worker)
+    log.info("Stopped workers")
diff --git a/apps/postgresql-base/Dockerfile b/apps/postgresql-base/Dockerfile
index 327d9ecede..272124e3d6 100644
--- a/apps/postgresql-base/Dockerfile
+++ b/apps/postgresql-base/Dockerfile
@@ -1,14 +1,84 @@
 #
-# PostgreSQL base
+# PostgreSQL base server
 #
 
-FROM gcr.io/mcback/base:latest
+FROM gcr.io/mcback/postgresql-repo-base:latest
 
-# Add Add PostgreSQL GPG key
-RUN curl -L https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
+# Install packages
+RUN \
+    #
+    # Install PostgreSQL
+    apt-get -y --no-install-recommends install \
+        postgresql-13 \
+        postgresql-client-13 \
+        postgresql-contrib-13 \
+        postgresql-plperl-13 \
+    && \
+    true
 
-# Add PostgreSQL APT repository
-RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ focal-pgdg main" > /etc/apt/sources.list.d/pgdg.list
+# Make some run directories
+RUN \
+    mkdir -p /var/run/postgresql/13-main.pg_stat_tmp && \
+    chown -R postgres:postgres /var/run/postgresql/13-main.pg_stat_tmp && \
+    true
 
-# Fetch new repositories
-RUN apt-get -y update
+# Write our own configuration
+RUN rm -rf /etc/postgresql/13/main/
+COPY conf/ /etc/postgresql/13/main/
+
+# This is where "update_memory_config.sh" script will write its memory settings
+# which it will auto-determine from available RAM on every run.
+RUN \
+    touch /var/run/postgresql/postgresql-memory.conf && \
+    chown postgres:postgres /var/run/postgresql/postgresql-memory.conf && \
+    true
+
+# Copy helper scripts
+RUN mkdir -p /opt/postgresql-base/
+COPY bin/* /opt/postgresql-base/bin/
+
+USER postgres
+
+RUN \
+    #
+    # Remove APT-initialized data directory because it doesn't have the right
+    # locale, doesn't use checksums etc.
+    rm -rf /var/lib/postgresql/13/main/ && \
+    #
+    # Update memory configuration in case we decide to start PostgreSQL at
+    # build time
+    # Update memory configuration
+    /opt/postgresql-base/bin/update_memory_config.sh && \
+    #
+    # Run initdb
+    mkdir -p /var/lib/postgresql/13/main/ && \
+    /usr/lib/postgresql/13/bin/initdb \
+        --pgdata=/var/lib/postgresql/13/main/ \
+        --data-checksums \
+        --encoding=UTF-8 \
+        --lc-collate='en_US.UTF-8' \
+        --lc-ctype='en_US.UTF-8' \
+    && \
+    true
+
+# VOLUME doesn't get set here as children of this image might amend the initial
+# data directory somehow (e.g. pre-initialize it with some schema). Once you do
+# that in the sub-image, don't forget to define VOLUME afterwards!
+
+# SIGTERM (Docker's default) will initiate PostgreSQL's "Smart Shutdown" mode
+# which will then wait for the current transactions to finish. If there are
+# active long-running queries, Docker will wait for "stop_grace_period", run
+# out of patience and SIGKILL the process, forcing PostgreSQL to recover the
+# database on restart.
+# So, instead we stop the database with SIGINT which triggers "Fast Shutdown":
+# active connections get terminated, and PostgreSQL shuts down considerably
+# faster and safer.
+STOPSIGNAL SIGINT
+
+# Server
+EXPOSE 5432
+
+# *Not* adding /opt/postgresql-base/ to $PATH so that users get to pick which
+# specific version of "postgresql.sh" to run
+
+CMD ["/opt/postgresql-base/bin/postgresql.sh"]
diff --git a/apps/postgresql-base/bin/postgresql.sh b/apps/postgresql-base/bin/postgresql.sh
new file mode 100755
index 0000000000..032c9e0a38
--- /dev/null
+++ b/apps/postgresql-base/bin/postgresql.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -u
+set -e
+
+MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/13/bin/"
+MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/13/main/"
+MC_POSTGRESQL_CONF_PATH="/etc/postgresql/13/main/postgresql.conf"
+
+# Update memory configuration
+/opt/postgresql-base/bin/update_memory_config.sh
+
+# Start PostgreSQL
+exec "${MC_POSTGRESQL_BIN_DIR}/postgres" \
+    -D "${MC_POSTGRESQL_DATA_DIR}" \
+    -c "config_file=${MC_POSTGRESQL_CONF_PATH}"
diff --git a/apps/postgresql-server/bin/update_memory_config.sh b/apps/postgresql-base/bin/update_memory_config.sh
similarity index 83%
rename from apps/postgresql-server/bin/update_memory_config.sh
rename to apps/postgresql-base/bin/update_memory_config.sh
index e60bbbc0dc..a0456d51c0 100755
--- a/apps/postgresql-server/bin/update_memory_config.sh
+++ b/apps/postgresql-base/bin/update_memory_config.sh
@@ -3,8 +3,6 @@
 set -u
 set -e
 
-MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/11/bin/"
-MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/11/main/"
 MC_POSTGRESQL_MEMORY_CONF_PATH="/var/run/postgresql/postgresql-memory.conf"
 
 # Adjust configuration based on amount of RAM
diff --git a/apps/postgresql-server/conf/environment b/apps/postgresql-base/conf/environment
similarity index 100%
rename from apps/postgresql-server/conf/environment
rename to apps/postgresql-base/conf/environment
diff --git a/apps/postgresql-server/conf/pg_ctl.conf b/apps/postgresql-base/conf/pg_ctl.conf
similarity index 100%
rename from apps/postgresql-server/conf/pg_ctl.conf
rename to apps/postgresql-base/conf/pg_ctl.conf
diff --git a/apps/postgresql-server/conf/pg_hba.conf b/apps/postgresql-base/conf/pg_hba.conf
similarity index 89%
rename from apps/postgresql-server/conf/pg_hba.conf
rename to apps/postgresql-base/conf/pg_hba.conf
index d734d1ffe9..1338cb83eb 100644
--- a/apps/postgresql-server/conf/pg_hba.conf
+++ b/apps/postgresql-base/conf/pg_hba.conf
@@ -6,4 +6,4 @@ host    all             all             ::1/128                 md5
 local   replication     all                                     peer
 host    replication     all             127.0.0.1/32            md5
 host    replication     all             ::1/128                 md5
-host    all             mediacloud      samenet                 md5
+host    all             all             samenet                 md5
diff --git a/apps/postgresql-server/conf/pg_ident.conf b/apps/postgresql-base/conf/pg_ident.conf
similarity index 100%
rename from apps/postgresql-server/conf/pg_ident.conf
rename to apps/postgresql-base/conf/pg_ident.conf
diff --git a/apps/postgresql-server/conf/postgresql.conf b/apps/postgresql-base/conf/postgresql.conf
similarity index 85%
rename from apps/postgresql-server/conf/postgresql.conf
rename to apps/postgresql-base/conf/postgresql.conf
index 8170f7bc85..cbd4c22669 100644
--- a/apps/postgresql-server/conf/postgresql.conf
+++ b/apps/postgresql-base/conf/postgresql.conf
@@ -2,10 +2,10 @@
 # Media Cloud PostgreSQL static configuration
 #
 
-data_directory = '/var/lib/postgresql/11/main'
-hba_file = '/etc/postgresql/11/main/pg_hba.conf'
-ident_file = '/etc/postgresql/11/main/pg_ident.conf'
-external_pid_file = '/var/run/postgresql/11-main.pid'
+data_directory = '/var/lib/postgresql/13/main'
+hba_file = '/etc/postgresql/13/main/pg_hba.conf'
+ident_file = '/etc/postgresql/13/main/pg_ident.conf'
+external_pid_file = '/var/run/postgresql/13-main.pid'
 
 port = 5432
 max_connections = 610
@@ -38,13 +38,13 @@ hot_standby_feedback = on
 
 random_page_cost = 1.0
 
-cluster_name = '11/main'
+cluster_name = '13/main'
 
 log_line_prefix = '%t [%p-%l] %q%u@%d '
 log_timezone = 'localtime'
 log_lock_waits = on
 
-stats_temp_directory = '/var/run/postgresql/11-main.pg_stat_tmp'
+stats_temp_directory = '/var/run/postgresql/13-main.pg_stat_tmp'
 
 datestyle = 'iso, mdy'
 timezone = 'localtime'
diff --git a/apps/postgresql-server/conf/start.conf b/apps/postgresql-base/conf/start.conf
similarity index 100%
rename from apps/postgresql-server/conf/start.conf
rename to apps/postgresql-base/conf/start.conf
diff --git a/apps/postgresql-pgbouncer/Dockerfile b/apps/postgresql-pgbouncer/Dockerfile
index 37d2dbc4c0..a2496d679f 100644
--- a/apps/postgresql-pgbouncer/Dockerfile
+++ b/apps/postgresql-pgbouncer/Dockerfile
@@ -2,7 +2,7 @@
 # PgBouncer
 #
 
-FROM gcr.io/mcback/postgresql-base:latest
+FROM gcr.io/mcback/postgresql-repo-base:latest
 
 # Install PgBouncer
 RUN \
diff --git a/apps/postgresql-pgbouncer/conf/pgbouncer.ini b/apps/postgresql-pgbouncer/conf/pgbouncer.ini
index cd6760882b..eb3f28662c 100644
--- a/apps/postgresql-pgbouncer/conf/pgbouncer.ini
+++ b/apps/postgresql-pgbouncer/conf/pgbouncer.ini
@@ -16,13 +16,26 @@ auth_file = /etc/pgbouncer/userlist.txt
 
 pool_mode = session
 server_reset_query = DISCARD ALL
-max_client_conn = 600
-default_pool_size = 600
+
+# Maximum number of client connections allowed
+max_client_conn = 5000
+
+# How many server connections to allow per user/database pair
+default_pool_size = 450
+
+# Do not allow more than this many server connections per database (regardless
+# of user)
+max_db_connections = 500
+
 log_connections = 0
 log_disconnections = 0
 stats_period = 600
 server_login_retry = 1
 
+# Don't let transactions idle around for more than 10 minutes to prevent buggy
+# code from leading to transaction wraparound issues
+idle_transaction_timeout = 600
+
 # PyCharm doesn't work without this one:
 # https://github.com/Athou/commafeed/issues/559
 ignore_startup_parameters = extra_float_digits
diff --git a/apps/podcast-poll-due-operations/.dockerignore b/apps/postgresql-repo-base/.dockerignore
similarity index 100%
rename from apps/podcast-poll-due-operations/.dockerignore
rename to apps/postgresql-repo-base/.dockerignore
diff --git a/apps/postgresql-repo-base/Dockerfile b/apps/postgresql-repo-base/Dockerfile
new file mode 100644
index 0000000000..43c9660011
--- /dev/null
+++ b/apps/postgresql-repo-base/Dockerfile
@@ -0,0 +1,19 @@
+#
+# PostgreSQL repository base
+#
+
+FROM gcr.io/mcback/base:latest
+
+RUN \
+    #
+    # Add Add PostgreSQL GPG key
+    curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \
+    #
+    # Add PostgreSQL APT repository
+    echo "deb http://apt.postgresql.org/pub/repos/apt/ focal-pgdg main" \
+        > /etc/apt/sources.list.d/pgdg.list && \
+    #
+    # Fetch new repositories
+    apt-get -y update && \
+    #
+    true
diff --git a/apps/postgresql-server/Dockerfile b/apps/postgresql-server/Dockerfile
index 25a8308803..1d8dc5ab18 100644
--- a/apps/postgresql-server/Dockerfile
+++ b/apps/postgresql-server/Dockerfile
@@ -1,19 +1,14 @@
 #
-# PostgreSQL server
+# Main backend PostgreSQL server
 #
 
 FROM gcr.io/mcback/postgresql-base:latest
 
-# Install packages
+USER root
 RUN \
-    apt-get -y update && \
-    #
-    # Install PostgreSQL
-    apt-get -y --no-install-recommends install \
-        postgresql-11 \
-        postgresql-client-11 \
-        postgresql-contrib-11 \
-        postgresql-plperl-11 \
+    mkdir -p \
+        /opt/postgresql-server/bin/ \
+        /opt/postgresql-server/schema/ \
     && \
     apt-get -y --no-install-recommends install python3 python3-pip python3-setuptools && \
     #
@@ -36,22 +31,9 @@ RUN \
     #
     true
 
-# Make some run directories
-RUN \
-    mkdir -p /var/run/postgresql/11-main.pg_stat_tmp && \
-    chown -R postgres:postgres /var/run/postgresql/11-main.pg_stat_tmp && \
-    true
-
-# Write our own configuration
-RUN rm -rf /etc/postgresql/11/main/
-COPY conf/ /etc/postgresql/11/main/
-
-# This is where "update_memory_config.sh" script will write its memory settings
-# which it will auto-determine from available RAM on every run.
-RUN \
-    touch /var/run/postgresql/postgresql-memory.conf && \
-    chown postgres:postgres /var/run/postgresql/postgresql-memory.conf && \
-    true
+# Copy helper scripts, schema, migrations
+COPY bin/* /opt/postgresql-server/bin/
+COPY schema/ /opt/postgresql-server/schema/
 
 # Copy helper scripts, schema, migrations, pgmigrate callbacks/config
 RUN mkdir -p /opt/mediacloud/
@@ -61,15 +43,14 @@ COPY migrations/ /opt/mediacloud/migrations/
 COPY migrations.yml /opt/mediacloud/migrations.yml
 RUN cd /opt/mediacloud
 
-USER postgres
-
 # Initialize data volume, create users + database
 # If a new empty volume gets mounted to /var/lib/postgresql/ upon
 # container start, Docker will copy the files from the container to the volume
+USER postgres
 RUN /opt/mediacloud/bin/initialize_db.sh
 
 ENV \
-    PATH="/opt/mediacloud/bin:${PATH}" \
+    PATH="/opt/postgresql-server/bin:${PATH}" \
     #
     # Make sure that we can connect via "psql" without sudoing into "postgres" user
     PGHOST=localhost \
@@ -78,26 +59,8 @@ ENV \
     PGPASSWORD=mediacloud \
     PGDATABASE=mediacloud
 
-# Remove the init script so that someone doesn't accidentally run it in production
-USER root
-RUN rm /opt/mediacloud/bin/initialize_db.sh
-
-USER postgres
-
 # PostgreSQL data
 VOLUME /var/lib/postgresql/
 
-# SIGTERM (Docker's default) will initiate PostgreSQL's "Smart Shutdown" mode
-# which will then wait for the current transactions to finish. If there are
-# active long-running queries, Docker will wait for "stop_grace_period", run
-# out of patience and SIGKILL the process, forcing PostgreSQL to recover the
-# database on restart.
-# So, instead we stop the database with SIGINT which triggers "Fast Shutdown":
-# active connections get terminated, and PostgreSQL shuts down considerably
-# faster and safer.
-STOPSIGNAL SIGINT
-
-# Server
-EXPOSE 5432
-
-CMD ["/opt/mediacloud/bin/postgresql_server.sh"]
+# Use our own wrapper script which runs schema upgrades first
+CMD ["/opt/postgresql-server/bin/postgresql.sh"]
diff --git a/apps/postgresql-server/bin/apply_migrations.sh b/apps/postgresql-server/bin/apply_migrations.sh
index 25b5e15dca..371c80f101 100755
--- a/apps/postgresql-server/bin/apply_migrations.sh
+++ b/apps/postgresql-server/bin/apply_migrations.sh
@@ -3,9 +3,9 @@
 set -u
 set -e
 
-MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/11/bin/"
-MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/11/main/"
-MC_POSTGRESQL_CONF_PATH="/etc/postgresql/11/main/postgresql.conf"
+MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/13/bin/"
+MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/13/main/"
+MC_POSTGRESQL_CONF_PATH="/etc/postgresql/13/main/postgresql.conf"
 
 MIGRATIONS_DIR="/opt/mediacloud/migrations"
 
diff --git a/apps/postgresql-server/bin/initialize_db.sh b/apps/postgresql-server/bin/initialize_db.sh
index 12ce297523..08967dcd50 100755
--- a/apps/postgresql-server/bin/initialize_db.sh
+++ b/apps/postgresql-server/bin/initialize_db.sh
@@ -3,25 +3,12 @@
 set -u
 set -e
 
-MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/11/bin/"
-MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/11/main/"
-MC_POSTGRESQL_CONF_PATH="/etc/postgresql/11/main/postgresql.conf"
+MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/13/bin/"
+MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/13/main/"
+MC_POSTGRESQL_CONF_PATH="/etc/postgresql/13/main/postgresql.conf"
 
 # Update memory configuration
-/opt/mediacloud/bin/update_memory_config.sh
-
-# Remove APT-initialized data directory because it doesn't have the right
-# locale, doesn't use checksums etc.
-rm -rf /var/lib/postgresql/11/main/
-
-# Run initdb
-mkdir -p "${MC_POSTGRESQL_DATA_DIR}"
-"${MC_POSTGRESQL_BIN_DIR}/initdb" \
-    --pgdata="${MC_POSTGRESQL_DATA_DIR}" \
-    --data-checksums \
-    --encoding=UTF-8 \
-    --lc-collate='en_US.UTF-8' \
-    --lc-ctype='en_US.UTF-8'
+/opt/postgresql-base/bin/update_memory_config.sh
 
 "${MC_POSTGRESQL_BIN_DIR}/pg_ctl" \
     -o "-c config_file=${MC_POSTGRESQL_CONF_PATH}" \
@@ -48,11 +35,11 @@ CREATE DATABASE mediacloud WITH
 EOF
 psql -v ON_ERROR_STOP=1 -c "${CREATE_DB_SQL}"
 
-# run migrations with pgmigrate package
+# Run migrations with pgmigrate package
 cd /opt/mediacloud && pgmigrate -t latest migrate
 
-# # dump schema file for reference in development
-psql mediacloud -c '\! pg_dump mediacloud > /tmp/mediawords.sql'
+# Dump schema file for reference in development
+psql -v ON_ERROR_STOP=1 mediacloud -c '\! pg_dump mediacloud > /tmp/mediawords.sql'
 
 # Stop PostgreSQL
 "${MC_POSTGRESQL_BIN_DIR}/pg_ctl" \
diff --git a/apps/postgresql-server/bin/postgresql_server.sh b/apps/postgresql-server/bin/postgresql.sh
similarity index 58%
rename from apps/postgresql-server/bin/postgresql_server.sh
rename to apps/postgresql-server/bin/postgresql.sh
index 50661ff1dd..cf7e7c5c57 100755
--- a/apps/postgresql-server/bin/postgresql_server.sh
+++ b/apps/postgresql-server/bin/postgresql.sh
@@ -3,12 +3,8 @@
 set -u
 set -e
 
-MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/11/bin/"
-MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/11/main/"
-MC_POSTGRESQL_CONF_PATH="/etc/postgresql/11/main/postgresql.conf"
-
 # Update memory configuration
-/opt/mediacloud/bin/update_memory_config.sh
+/opt/postgresql-base/bin/update_memory_config.sh
 
 # Run schema migrations if needed
 if [ -e /var/lib/postgresql/first_run ]; then
@@ -19,11 +15,9 @@ elif [ ! -z ${MC_POSTGRESQL_SKIP_MIGRATIONS+x} ]; then
     echo "Skipping schema migrations because 'MC_POSTGRESQL_SKIP_MIGRATIONS' is set."
 else
     echo "Applying schema migrations..."
-    /opt/mediacloud/bin/apply_migrations.sh
+    /opt/postgresql-server/bin/apply_migrations.sh
     echo "Done applying schema migrations."
 fi
 
 # Start PostgreSQL
-exec "${MC_POSTGRESQL_BIN_DIR}/postgres" \
-    -D "${MC_POSTGRESQL_DATA_DIR}" \
-    -c "config_file=${MC_POSTGRESQL_CONF_PATH}"
+exec /opt/postgresql-base/bin/postgresql.sh
diff --git a/apps/postgresql-server/bin/pps b/apps/postgresql-server/bin/pps
index f007f3dfb8..ff24e59a45 100755
--- a/apps/postgresql-server/bin/pps
+++ b/apps/postgresql-server/bin/pps
@@ -6,7 +6,26 @@ else
     COLS=`tput cols`
 fi
 
-echo "select psa.pid, min(application_name) as client, substr(query_start::text, 0, 20) as date, granted as l, regexp_replace(query, E'[\\n\\r ]+', ' ', 'g' ) q from pg_stat_activity psa left join pg_locks pl on ( psa.pid = pl.pid and pl.granted = 'f' ) where state not like 'idle%' group by psa.pid, usename, state, query_start, granted, q order by query_start desc" | psql mediacloud | cut -c 1-$COLS
-
+cat <<EOF | psql mediacloud | cut -c 1-$COLS
 
+SELECT
+	psa.pid,
+	MIN(application_name) AS client,
+	SUBSTR(query_start::text, 0, 20) AS date,
+	granted AS l,
+	REGEXP_REPLACE(query, E'[\\n\\r ]+', ' ', 'g' ) AS q
+FROM pg_stat_activity AS psa
+    LEFT JOIN pg_locks AS pl
+        ON psa.pid = pl.pid
+       AND pl.granted = 'f'
+WHERE state NOT LIKE 'idle%'
+GROUP BY
+    psa.pid,
+    usename,
+    state,
+    query_start,
+    granted,
+    q
+ORDER BY query_start DESC
 
+EOF
diff --git a/apps/postgresql-server/migrations/V0001__initial_schema.sql b/apps/postgresql-server/migrations/V0001__initial_schema.sql
index d06ab61174..96520e1878 100644
--- a/apps/postgresql-server/migrations/V0001__initial_schema.sql
+++ b/apps/postgresql-server/migrations/V0001__initial_schema.sql
@@ -26,7 +26,7 @@ CREATE OR REPLACE FUNCTION set_database_schema_version() RETURNS boolean AS $$
 DECLARE
     -- Database schema version number (same as a SVN revision number)
     -- Increase it by 1 if you make major database schema changes.
-    MEDIACLOUD_DATABASE_SCHEMA_VERSION CONSTANT INT := 4759;
+    MEDIACLOUD_DATABASE_SCHEMA_VERSION CONSTANT INT := 4761;
 BEGIN
 
     -- Update / set database schema version
@@ -1618,48 +1618,6 @@ COMMENT ON FUNCTION pop_queued_download () IS 'do this as a plpgsql function
 because it wraps it in the necessary transaction without having to know whether 
 the calling context is in a transaction';
 
--- efficiently query downloads_pending for the latest downloads_id per host.  postgres is not able to do this through
--- its normal query planning (it just does an index scan of the whole index).  this turns a query that 
--- takes ~22 seconds for a 100 million row table into one that takes ~0.25 seconds
-create or replace function get_downloads_for_queue() returns table(downloads_id bigint) as $$
-declare
-    pending_host record;
-begin
-    -- quick temp copy without dead row issues for querying in loop body below
-    create temporary table qd on commit drop as select * from queued_downloads;
-
-    create temporary table pending_downloads (downloads_id bigint) on commit drop;
-    for pending_host in
-            WITH RECURSIVE t AS (
-               (SELECT host FROM downloads_pending ORDER BY host LIMIT 1)
-               UNION ALL
-               SELECT (SELECT host FROM downloads_pending WHERE host > t.host ORDER BY host LIMIT 1)
-               FROM t
-               WHERE t.host IS NOT NULL
-               )
-            SELECT host FROM t WHERE host IS NOT NULL
-        loop
-            insert into pending_downloads
-                select dp.downloads_id
-                    from downloads_pending dp
-                        left join qd on ( dp.downloads_id = qd.downloads_id )
-                    where 
-                        host = pending_host.host and
-                        qd.downloads_id is null
-                    order by priority, downloads_id desc nulls last
-                    limit 1;
-        end loop;
-
-    return query select pd.downloads_id from pending_downloads pd;
- end;
-
-$$ language plpgsql;
-
-COMMENT ON FUNCTION get_downloads_for_queue () IS 'efficiently query downloads_pending 
-for the latest downloads_id per host.  postgres is not able to do this through its 
-normal query planning (it just does an index scan of the whole index). this turns 
-a query that  takes ~22 seconds for a 100 million row table into one that takes ~0.25 seconds';
-
 --
 -- Extracted plain text from every download
 --
@@ -4157,155 +4115,6 @@ CREATE UNIQUE INDEX story_enclosures_stories_id_url
     ON story_enclosures (stories_id, url);
 
 
---
--- Audio file codec; keep in sync with "_SUPPORTED_NATIVE_AUDIO_CODECS" constant
--- (https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1p1beta1)
---
-CREATE TYPE podcast_episodes_audio_codec AS ENUM (
-    'LINEAR16',
-    'FLAC',
-    'MULAW',
-    'OGG_OPUS',
-    'MP3'
-);
-
-COMMENT ON TYPE podcast_episodes_audio_codec IS 'Audio file codec; keep in sync with "_SUPPORTED_NATIVE_AUDIO_CODECS" 
-constant (https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1p1beta1)';
-
---
--- Podcast story episodes (derived from enclosures)
---
-CREATE TABLE podcast_episodes (
-    podcast_episodes_id     BIGSERIAL   PRIMARY KEY,
-    stories_id              INT         NOT NULL REFERENCES stories (stories_id) ON DELETE CASCADE,
-
-    -- Enclosure that's considered to point to a podcast episode
-    story_enclosures_id     BIGINT      NOT NULL
-                                            REFERENCES story_enclosures (story_enclosures_id)
-                                            ON DELETE CASCADE,
-
-    -- Google Cloud Storage URI where the audio file is located at
-    gcs_uri                 TEXT        NOT NULL
-                                            CONSTRAINT gcs_uri_has_gs_prefix
-                                            CHECK(gcs_uri LIKE 'gs://%'),
-
-    -- Duration (in seconds)
-    duration                INT         NOT NULL
-                                            CONSTRAINT duration_is_positive
-                                            CHECK(duration > 0),
-
-    -- Audio codec as determined by transcoder
-    codec                   podcast_episodes_audio_codec  NOT NULL,
-
-    -- Audio sample rate (Hz) as determined by transcoder
-    sample_rate             INT         NOT NULL
-                                            CONSTRAINT sample_rate_looks_reasonable
-                                            CHECK(sample_rate > 1000),
-
-    -- BCP 47 language identifier
-    -- (https://cloud.google.com/speech-to-text/docs/languages)
-    bcp47_language_code     CITEXT      NOT NULL
-                                            CONSTRAINT bcp47_language_code_looks_reasonable
-                                            CHECK(
-                                                bcp47_language_code LIKE '%-%'
-                                             OR bcp47_language_code = 'zh'
-                                            ),
-
-    -- Speech API operation ID to be used for retrieving transcription; if NULL,
-    -- transcription job hasn't been submitted yet
-    speech_operation_id     TEXT        NULL
-
-);
-
-COMMENT ON TABLE podcast_episodes IS 'Podcast story episodes (derived from enclosures)';
-COMMENT ON COLUMN podcast_episodes.story_enclosures_id IS 'Enclosure that is considered 
-to point to a podcast episode';
-COMMENT ON COLUMN podcast_episodes.gcs_uri IS 'Google Cloud Storage URI where object is located';
-COMMENT ON COLUMN podcast_episodes.duration IS 'seconds';
-COMMENT ON COLUMN podcast_episodes.sample_rate IS 'Audio sample rate (Hz) as determined by transcoder';
-COMMENT ON COLUMN podcast_episodes.bcp47_language_code IS 'BCP 47 language identifier 
-(https://cloud.google.com/speech-to-text/docs/languages)';
-COMMENT ON COLUMN podcast_episodes.speech_operation_id IS 'Speech API operation ID to be used for 
-retrieving transcription; if NULL, transcription job has not been submitted yet';
-
--- Only one episode per story
-CREATE UNIQUE INDEX podcast_episodes_stories_id
-    ON podcast_episodes (stories_id);
-
-CREATE UNIQUE INDEX podcast_episodes_story_enclosures_id
-    ON podcast_episodes (story_enclosures_id);
-
-CREATE UNIQUE INDEX podcast_episodes_stories_id_story_enclosures_id
-    ON podcast_episodes (stories_id, story_enclosures_id);
-
-
--- Result of an attempt to fetch the transcript
-CREATE TYPE podcast_episode_transcript_fetch_result AS ENUM (
-
-    -- Operation was not yet finished yet at the time of fetching
-    'in_progress',
-
-    -- Operation was finished and transcription has succeeded
-    'success',
-
-    -- Operation was finished but the transcription has failed
-    'error'
-
-);
-
-
---
--- Attempts to fetch podcast episode transcript
--- (we might need to try fetching the operation's results multiple times)
---
-CREATE TABLE podcast_episode_transcript_fetches (
-    podcast_episode_transcript_fetches_id   BIGSERIAL   PRIMARY KEY,
-
-    -- Podcast that is being transcribed
-    podcast_episodes_id     BIGINT  NOT NULL
-                                        REFERENCES podcast_episodes (podcast_episodes_id)
-                                        ON DELETE CASCADE,
-
-    -- Timestamp for when a fetch job should be added to the job broker's queue the soonest
-    add_to_queue_at     TIMESTAMP WITH TIME ZONE                NOT NULL,
-
-    -- Timestamp for when a fetch job was added to the job broker's queue;
-    -- if NULL, a fetch job was never added to the queue
-    added_to_queue_at   TIMESTAMP WITH TIME ZONE                NULL,
-
-    -- Timestamp when the operation's results were attempted to be fetched by the worker;
-    -- if NULL, the results weren't attempted to be fetched yet
-    fetched_at      TIMESTAMP WITH TIME ZONE                    NULL,
-
-    -- Result of the fetch attempt;
-    -- if NULL, the operation fetch didn't happen yet
-    result          podcast_episode_transcript_fetch_result     NULL,
-
-    -- If result = 'error', error message that happened with the fetch attempt
-    error_message   TEXT                                        NULL
-
-);
-
-
--- Function that returns true if results were attempted at being fetched
-CREATE FUNCTION podcast_episode_transcript_was_added_to_queue(p_added_to_queue_at TIMESTAMP WITH TIME ZONE)
-RETURNS BOOL AS $$
-
-    SELECT CASE WHEN p_added_to_queue_at::timestamp IS NULL THEN false ELSE true END;
-
-$$ LANGUAGE SQL IMMUTABLE;
-
-
-CREATE INDEX podcast_episode_transcript_fetches_podcast_episodes_id
-    ON podcast_episode_transcript_fetches (podcast_episodes_id);
-
-CREATE UNIQUE INDEX podcast_episode_transcript_fetches_due
-    ON podcast_episode_transcript_fetches (
-        add_to_queue_at,
-        podcast_episode_transcript_was_added_to_queue(added_to_queue_at)
-    );
-
-
 --
 -- Celery job results
 -- (configured as self.__app.conf.database_table_names; schema is dictated by Celery + SQLAlchemy)
diff --git a/apps/postgresql-server/schema/migrations/mediawords-4759-4760.sql b/apps/postgresql-server/schema/migrations/mediawords-4759-4760.sql
new file mode 100644
index 0000000000..9d8fe5e135
--- /dev/null
+++ b/apps/postgresql-server/schema/migrations/mediawords-4759-4760.sql
@@ -0,0 +1,44 @@
+--
+-- This is a Media Cloud PostgreSQL schema difference file (a "diff") between schema
+-- versions 4759 and 4760.
+--
+-- If you are running Media Cloud with a database that was set up with a schema version
+-- 4759, and you would like to upgrade both the Media Cloud and the
+-- database to be at version 4760, import this SQL file:
+--
+--     psql mediacloud < mediawords-4759-4760.sql
+--
+-- You might need to import some additional schema diff files to reach the desired version.
+--
+--
+-- 1 of 2. Import the output of 'apgdiff':
+--
+
+
+DROP FUNCTION IF EXISTS get_downloads_for_queue();
+
+
+--
+-- 2 of 2. Reset the database version.
+--
+
+CREATE OR REPLACE FUNCTION set_database_schema_version() RETURNS boolean AS $$
+DECLARE
+
+    -- Database schema version number (same as a SVN revision number)
+    -- Increase it by 1 if you make major database schema changes.
+    MEDIACLOUD_DATABASE_SCHEMA_VERSION CONSTANT INT := 4760;
+
+BEGIN
+
+    -- Update / set database schema version
+    DELETE FROM database_variables WHERE name = 'database-schema-version';
+    INSERT INTO database_variables (name, value) VALUES ('database-schema-version', MEDIACLOUD_DATABASE_SCHEMA_VERSION::int);
+
+    return true;
+
+END;
+$$
+LANGUAGE 'plpgsql';
+
+SELECT set_database_schema_version();
diff --git a/apps/postgresql-server/schema/migrations/mediawords-4760-4761.sql b/apps/postgresql-server/schema/migrations/mediawords-4760-4761.sql
new file mode 100644
index 0000000000..9ae44c62a6
--- /dev/null
+++ b/apps/postgresql-server/schema/migrations/mediawords-4760-4761.sql
@@ -0,0 +1,48 @@
+--
+-- This is a Media Cloud PostgreSQL schema difference file (a "diff") between schema
+-- versions 4760 and 4761.
+--
+-- If you are running Media Cloud with a database that was set up with a schema version
+-- 4760, and you would like to upgrade both the Media Cloud and the
+-- database to be at version 4761, import this SQL file:
+--
+--     psql mediacloud < mediawords-4760-4761.sql
+--
+-- You might need to import some additional schema diff files to reach the desired version.
+--
+--
+-- 1 of 2. Import the output of 'apgdiff':
+--
+
+
+DROP TABLE podcast_episode_transcript_fetches;
+DROP TABLE podcast_episodes;
+DROP TYPE podcast_episodes_audio_codec;
+DROP TYPE podcast_episode_transcript_fetch_result;
+DROP FUNCTION podcast_episode_transcript_was_added_to_queue(TIMESTAMP WITH TIME ZONE);
+
+
+--
+-- 2 of 2. Reset the database version.
+--
+
+CREATE OR REPLACE FUNCTION set_database_schema_version() RETURNS boolean AS $$
+DECLARE
+
+    -- Database schema version number (same as a SVN revision number)
+    -- Increase it by 1 if you make major database schema changes.
+    MEDIACLOUD_DATABASE_SCHEMA_VERSION CONSTANT INT := 4761;
+
+BEGIN
+
+    -- Update / set database schema version
+    DELETE FROM database_variables WHERE name = 'database-schema-version';
+    INSERT INTO database_variables (name, value) VALUES ('database-schema-version', MEDIACLOUD_DATABASE_SCHEMA_VERSION::int);
+
+    return true;
+
+END;
+$$
+LANGUAGE 'plpgsql';
+
+SELECT set_database_schema_version();
diff --git a/apps/podcast-submit-operation/.dockerignore b/apps/postgresql-upgrade/.dockerignore
similarity index 100%
rename from apps/podcast-submit-operation/.dockerignore
rename to apps/postgresql-upgrade/.dockerignore
diff --git a/apps/postgresql-upgrade/.idea/.gitignore b/apps/postgresql-upgrade/.idea/.gitignore
new file mode 100644
index 0000000000..73f69e0958
--- /dev/null
+++ b/apps/postgresql-upgrade/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/apps/podcast-fetch-transcript/.idea/inspectionProfiles/profiles_settings.xml b/apps/postgresql-upgrade/.idea/inspectionProfiles/profiles_settings.xml
similarity index 100%
rename from apps/podcast-fetch-transcript/.idea/inspectionProfiles/profiles_settings.xml
rename to apps/postgresql-upgrade/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/apps/postgresql-upgrade/.idea/misc.xml b/apps/postgresql-upgrade/.idea/misc.xml
new file mode 100644
index 0000000000..96297493a0
--- /dev/null
+++ b/apps/postgresql-upgrade/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker (gcr.io/mcback/postgresql-upgrade:latest)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/apps/podcast-fetch-episode/.idea/modules.xml b/apps/postgresql-upgrade/.idea/modules.xml
similarity index 53%
rename from apps/podcast-fetch-episode/.idea/modules.xml
rename to apps/postgresql-upgrade/.idea/modules.xml
index 1f8ef01409..36c43c68df 100644
--- a/apps/podcast-fetch-episode/.idea/modules.xml
+++ b/apps/postgresql-upgrade/.idea/modules.xml
@@ -2,7 +2,7 @@
 <project version="4">
   <component name="ProjectModuleManager">
     <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/podcast-fetch-episode.iml" filepath="$PROJECT_DIR$/.idea/podcast-fetch-episode.iml" />
+      <module fileurl="file://$PROJECT_DIR$/.idea/postgresql-upgrade.iml" filepath="$PROJECT_DIR$/.idea/postgresql-upgrade.iml" />
     </modules>
   </component>
 </project>
\ No newline at end of file
diff --git a/apps/postgresql-upgrade/.idea/postgresql-upgrade.iml b/apps/postgresql-upgrade/.idea/postgresql-upgrade.iml
new file mode 100644
index 0000000000..f0558f493d
--- /dev/null
+++ b/apps/postgresql-upgrade/.idea/postgresql-upgrade.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker (gcr.io/mcback/postgresql-upgrade:latest)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/apps/podcast-poll-due-operations/.idea/vcs.xml b/apps/postgresql-upgrade/.idea/vcs.xml
similarity index 100%
rename from apps/podcast-poll-due-operations/.idea/vcs.xml
rename to apps/postgresql-upgrade/.idea/vcs.xml
diff --git a/apps/postgresql-upgrade/Dockerfile b/apps/postgresql-upgrade/Dockerfile
new file mode 100644
index 0000000000..35cc37361f
--- /dev/null
+++ b/apps/postgresql-upgrade/Dockerfile
@@ -0,0 +1,57 @@
+#
+# PostgreSQL upgrade scripts
+#
+
+FROM gcr.io/mcback/postgresql-base:latest
+
+USER root
+
+# Install Python 3 for running the upgrade script
+RUN apt-get -y --no-install-recommends install python3
+
+# Install packages
+RUN \
+    #
+    # Install PostgreSQL 13 (oldest version)
+    apt-get -y --no-install-recommends install \
+        postgresql-13 \
+        postgresql-client-13 \
+        postgresql-contrib-13 \
+        postgresql-plperl-13 \
+    && \
+    #
+    # Install PostgreSQL 14 (newest version)
+    # apt-get -y --no-install-recommends install \
+    #     postgresql-12 \
+    #     postgresql-client-12 \
+    #     postgresql-contrib-12 \
+    #     postgresql-plperl-12 \
+    # && \
+    #
+    true
+
+RUN \
+    #
+    # Make some run directories
+    mkdir -p /var/run/postgres/ && \
+    chown -R postgres:postgres /var/run/postgres/ && \
+    #
+    # Remove what might have gotten created in the parent image as we won't use it
+    mkdir -p /var/lib/postgresql/ && \
+    chown -R postgres:postgres /var/lib/postgresql/ && \
+    rm -rf /var/lib/postgresql/* && \
+    #
+    # Remove extra configurations leaving only the one from parent "postgresql-base"
+    rm -rf /etc/postgresql/13/ && \
+    # rm -rf /etc/postgresql/14/ && \
+    #
+    true
+
+COPY bin/postgresql_upgrade.py /usr/bin/
+
+# This is where the volume is supposed to be mounted
+VOLUME /var/lib/postgresql/
+
+USER postgres
+
+CMD ["postgresql_upgrade.py"]
diff --git a/apps/postgresql-upgrade/bin/postgresql_upgrade.py b/apps/postgresql-upgrade/bin/postgresql_upgrade.py
new file mode 100755
index 0000000000..4f6b0a2c47
--- /dev/null
+++ b/apps/postgresql-upgrade/bin/postgresql_upgrade.py
@@ -0,0 +1,457 @@
+#!/usr/bin/env python3
+
+"""
+PostgreSQL upgrade script.
+
+Usage:
+
+time docker run -it \
+    --shm-size=64g \
+    -v ~/Downloads/postgres_11_vol/:/var/lib/postgresql/ \
+    gcr.io/mcback/postgresql-upgrade \
+    postgresql_upgrade.py --source_version=11 --target_version=12 \
+    > postgresql_upgrade.log
+"""
+
+import argparse
+import dataclasses
+import getpass
+import glob
+import logging
+import multiprocessing
+import os
+import pathlib
+import shutil
+import signal
+import subprocess
+import time
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+class PostgresUpgradeError(Exception):
+    pass
+
+
+POSTGRES_DATA_DIR = "/var/lib/postgresql"
+POSTGRES_USER = 'postgres'
+
+
+def _dir_exists_and_accessible(directory: str) -> bool:
+    return os.path.isdir(directory) and os.access(directory, os.X_OK)
+
+
+def _ram_size_mb() -> int:
+    """Return RAM size (in megabytes) that is allocated to the container."""
+    ram_size = int(subprocess.check_output(['/container_memory_limit.sh']).decode('utf-8'))
+    assert ram_size, "RAM size can't be zero."
+    return ram_size
+
+
+class _PostgresVersion(object):
+    """
+    Data object of a single PostgreSQL version to upgrade from / to.
+    """
+    __slots__ = [
+        'version',
+        'data_dir',
+        'main_dir',
+        'bin_dir',
+        'initdb',
+        'pg_upgrade',
+        'vacuumdb',
+        'postgres',
+        'tmp_conf_dir',
+        'port',
+    ]
+
+    @classmethod
+    def _current_postgresql_config_path(cls) -> str:
+        """
+        Returns path to currently present PostgreSQL configuration directory.
+
+        :return: Path to currently present PostgreSQL configuration directory, e.g. /etc/postgresql/11/main/.
+        """
+        conf_list = os.listdir('/etc/postgresql/')
+        if len(conf_list) != 1:
+            raise PostgresUpgradeError(f"More / less than one PostgreSQL configuration set has been found: {conf_list}")
+        current_version = conf_list[0]
+        if not current_version.isdecimal():
+            raise PostgresUpgradeError(f"Invalid PostgreSQL version: {current_version}")
+        current_version = int(current_version)
+
+        current_postgresql_config_path = os.path.join('/etc/postgresql/', str(current_version), 'main')
+        if not os.path.isfile(os.path.join(current_postgresql_config_path, 'postgresql.conf')):
+            raise PostgresUpgradeError(f"postgresql.conf does not exist in {current_postgresql_config_path}.")
+
+        return current_postgresql_config_path
+
+    def __init__(self,
+                 version: int,
+                 target_version: bool,
+                 starting_version: bool,
+                 port: int,
+                 extra_postgres_config: str):
+        """
+        Constructor.
+
+        Checks whether various binaries / paths / directories are available.
+
+        :param version: PostgreSQL version number, e.g. 11.
+        :param target_version: If True, this data object represents a version that is being upgraded *to*.
+        :param starting_version: If True, this data object represents a source version, i.e. the initial version that is
+        being upgraded from.
+        :param port: PostgreSQL temporary port number, e.g. 50432.
+        :param extra_postgres_config: Extra lines to add to temporary postgresql.conf.
+        """
+        assert isinstance(version, int), "Version number must be integer."
+        self.version = version
+        assert isinstance(port, int), "Port must be an integer."
+        self.port = port
+
+        self.data_dir = os.path.join(POSTGRES_DATA_DIR, str(version))
+        if target_version:
+            if os.path.exists(self.data_dir):
+                raise PostgresUpgradeError((
+                    f"New data directory {self.data_dir} already exists; if the previous attempt to upgrade failed, "
+                    "run something like this:\n\n"
+                    f"    rm -rf {self.data_dir}\n"
+                    "\n\n"
+                    "on a container, or adjust the path on the host, or revert to old ZFS snapshot."
+                ))
+        else:
+            if starting_version:
+                if not _dir_exists_and_accessible(self.data_dir):
+                    raise PostgresUpgradeError((
+                        f"Old data directory {self.data_dir} does not exist or is inaccessible; forgot to mount it?"
+                    ))
+
+        self.main_dir = os.path.join(self.data_dir, "main")
+        if not target_version:
+            if starting_version:
+                if not _dir_exists_and_accessible(self.main_dir):
+                    raise PostgresUpgradeError(f"Old main directory {self.main_dir} does not exist or is inaccessible.")
+
+                pg_version_path = os.path.join(self.main_dir, 'PG_VERSION')
+                if not os.path.isfile(pg_version_path):
+                    raise PostgresUpgradeError(f"{pg_version_path} does not exist or is inaccessible.")
+
+                postmaster_pid_path = os.path.join(self.main_dir, 'postmaster.pid')
+                if os.path.exists(postmaster_pid_path):
+                    raise PostgresUpgradeError(f"{postmaster_pid_path} exists; is the database running?")
+
+        # Create run directory
+        pathlib.Path(f"/var/run/postgresql/{version}-main.pg_stat_tmp/").mkdir(parents=True, exist_ok=True)
+
+        self.bin_dir = f"/usr/lib/postgresql/{version}/bin/"
+
+        if not _dir_exists_and_accessible(self.bin_dir):
+            raise PostgresUpgradeError(f"Binaries directory {self.bin_dir} does not exist or is inaccessible.")
+        if not _dir_exists_and_accessible(self.bin_dir):
+            raise PostgresUpgradeError(f"Binaries directory {self.bin_dir} does not exist or is inaccessible.")
+
+        self.postgres = os.path.join(self.bin_dir, 'postgres')
+        if not os.access(self.postgres, os.X_OK):
+            raise PostgresUpgradeError(f"'postgres' at {self.postgres} does not exist.")
+
+        if target_version:
+
+            self.initdb = os.path.join(self.bin_dir, 'initdb')
+            if not os.access(self.initdb, os.X_OK):
+                raise PostgresUpgradeError(f"'initdb' at {self.initdb} does not exist.")
+
+            self.pg_upgrade = os.path.join(self.bin_dir, 'pg_upgrade')
+            if not os.access(self.pg_upgrade, os.X_OK):
+                raise PostgresUpgradeError(f"'pg_upgrade' at {self.pg_upgrade} does not exist.")
+
+            self.vacuumdb = os.path.join(self.bin_dir, 'vacuumdb')
+            if not os.access(self.vacuumdb, os.X_OK):
+                raise PostgresUpgradeError(f"'vacuumdb' at {self.vacuumdb} does not exist.")
+
+        logging.info(f"Creating temporary configuration for version {version}...")
+        self.tmp_conf_dir = f"/var/tmp/postgresql/conf/{version}"
+        if os.path.exists(self.tmp_conf_dir):
+            shutil.rmtree(self.tmp_conf_dir)
+        current_postgresql_config_path = self._current_postgresql_config_path()
+        shutil.copytree(current_postgresql_config_path, self.tmp_conf_dir)
+
+        with open(os.path.join(self.tmp_conf_dir, 'postgresql.conf'), 'a') as postgresql_conf:
+            postgresql_conf.write(f"""
+
+            port = {port}
+            data_directory = '/var/lib/postgresql/{version}/main'
+            hba_file = '{self.tmp_conf_dir}/pg_hba.conf'
+            ident_file = '{self.tmp_conf_dir}/pg_ident.conf'
+            external_pid_file = '/var/run/postgresql/{version}-main.pid'
+            cluster_name = '{version}/main'
+            stats_temp_directory = '/var/run/postgresql/{version}-main.pg_stat_tmp'
+
+            {extra_postgres_config}
+
+            """)
+
+
+@dataclasses.dataclass
+class _PostgresVersionPair(object):
+    """
+    Version pair to upgrade between.
+
+    Must be different by exactly one version number, e.g. 11 and 12.
+    """
+    old_version: _PostgresVersion
+    new_version: _PostgresVersion
+
+
+class _PostgreSQLServer(object):
+    """PostgreSQL server helper."""
+
+    __slots__ = [
+        '__port',
+        '__bin_dir',
+        '__data_dir',
+        '__conf_dir',
+
+        '__proc',
+    ]
+
+    def __init__(self, port: int, bin_dir: str, data_dir: str, conf_dir: str):
+        assert isinstance(port, int), "Port must be an integer."
+        assert os.path.isdir(bin_dir), f"{bin_dir} does not exist."
+        assert os.access(os.path.join(bin_dir, 'postgres'), os.X_OK), f"'postgres' does not exist in {bin_dir}."
+        assert os.access(os.path.join(bin_dir, 'pg_isready'), os.X_OK), f"'pg_isready' does not exist in {bin_dir}."
+        assert os.path.isdir(data_dir), f"{data_dir} does not exist."
+        assert os.path.isdir(conf_dir), f"{conf_dir} does not exist."
+        assert os.path.isfile(
+            os.path.join(conf_dir, 'postgresql.conf')
+        ), f"postgresql.conf in {conf_dir} does not exist."
+
+        self.__bin_dir = bin_dir
+        self.__port = port
+        self.__data_dir = data_dir
+        self.__conf_dir = conf_dir
+
+        self.__proc = None
+
+    def start(self) -> None:
+        assert not self.__proc, "PostgreSQL is already started."
+
+        logging.info("Starting PostgreSQL...")
+        self.__proc = subprocess.Popen([
+            os.path.join(self.__bin_dir, 'postgres'),
+            '-D', self.__data_dir,
+            '-c', f'config_file={self.__conf_dir}/postgresql.conf',
+        ])
+
+        # Waiting for port is not enough as PostgreSQL might be recovering
+        while True:
+            try:
+                subprocess.check_call([os.path.join(self.__bin_dir, 'pg_isready'), '--port', str(self.__port)])
+            except subprocess.CalledProcessError as ex:
+                logging.debug(f"pg_isready failed: {ex}")
+                logging.info("Waiting for PostgreSQL to come up...")
+                time.sleep(1)
+            else:
+                break
+
+        logging.info("PostgreSQL is up!")
+
+    def stop(self) -> None:
+        assert self.__proc, "PostgreSQL has not been started."
+
+        logging.info("Waiting for PostgreSQL to shut down...")
+        self.__proc.send_signal(signal.SIGTERM)
+        self.__proc.wait()
+
+        logging.info("PostgreSQL has been shut down")
+
+        self.__proc = None
+
+
+def postgres_upgrade(source_version: int, target_version: int) -> None:
+    """
+    Upgrade PostgreSQL from source version up to target version.
+
+    :param source_version: Source dataset version, e.g. 11.
+    :param target_version: Target dataset version, e.g. 13.
+    """
+    logging.debug(f"Source version: {source_version}; target version: {target_version}")
+
+    # Unset environment variables from parent image so that pg_upgrade can make its
+    # own decisions about which credentials to use
+    del os.environ['PGHOST']
+    del os.environ['PGPORT']
+    del os.environ['PGUSER']
+    del os.environ['PGPASSWORD']
+    del os.environ['PGDATABASE']
+
+    if not _dir_exists_and_accessible(POSTGRES_DATA_DIR):
+        raise PostgresUpgradeError(f"{POSTGRES_DATA_DIR} does not exist or is inaccessible.")
+
+    if getpass.getuser() != POSTGRES_USER:
+        raise PostgresUpgradeError(f"This script is to be run as '{POSTGRES_USER}' user.")
+
+    if target_version <= source_version:
+        raise PostgresUpgradeError(
+            f"Target version {target_version} is not newer than source version {source_version}."
+        )
+
+    shm_size = int(shutil.disk_usage("/dev/shm")[0] / 1024 / 1024)
+    min_shm_size = int(_ram_size_mb() / 3) - 1024
+    if shm_size < min_shm_size:
+        raise PostgresUpgradeError(
+            f"Container's /dev/shm should be at least {min_shm_size} MB; try passing --shm-size property."
+        )
+
+    logging.info("Updating memory configuration...")
+    subprocess.check_call(['/opt/mediacloud/bin/update_memory_config.sh'])
+
+    # Remove cruft that might have been left over from last attempt to do the upgrade
+    patterns = [
+        'pg_*.log',
+        'pg_*.custom',
+        'pg_upgrade_dump_globals.sql',
+    ]
+    for pattern in patterns:
+        for file in glob.glob(os.path.join(POSTGRES_DATA_DIR, pattern)):
+            logging.debug(f"Deleting {file}...")
+            os.unlink(pattern)
+
+    new_maintenance_work_mem = int(_ram_size_mb() / 10)
+    logging.info(f"New maintenance work memory limit: {new_maintenance_work_mem} MB")
+    maintenance_work_mem_statement = f'maintenance_work_mem = {new_maintenance_work_mem}MB'
+
+    # Work out upgrade pairs
+    # (initialize the pairs first so that _PostgresVersion() gets a chance to test environment first)
+    upgrade_pairs = []
+    current_port = 50432
+    for version in range(source_version, target_version):
+        upgrade_pairs.append(
+            _PostgresVersionPair(
+                old_version=_PostgresVersion(
+                    version=version,
+                    target_version=False,
+                    starting_version=(version == source_version),
+                    port=current_port,
+                    extra_postgres_config='',
+                ),
+                new_version=_PostgresVersion(
+                    version=version + 1,
+                    target_version=True,
+                    starting_version=False,
+                    port=current_port + 1,
+                    extra_postgres_config=maintenance_work_mem_statement,
+                )
+            ))
+        current_port = current_port + 2
+
+    initial_version = upgrade_pairs[0].old_version
+    logging.info("Starting PostgreSQL before upgrade in case the last shutdown was unclean...")
+    proc = _PostgreSQLServer(
+        port=initial_version.port,
+        bin_dir=initial_version.bin_dir,
+        data_dir=initial_version.main_dir,
+        conf_dir=initial_version.tmp_conf_dir,
+    )
+    proc.start()
+    proc.stop()
+
+    for pair in upgrade_pairs:
+
+        logging.info(f"Upgrading from {pair.old_version.version} to {pair.new_version.version}...")
+
+        logging.info("Running initdb...")
+        pathlib.Path(pair.new_version.main_dir).mkdir(parents=True, exist_ok=True)
+        subprocess.check_call([
+            pair.new_version.initdb,
+            '--pgdata', pair.new_version.main_dir,
+
+            # At the time of writing we don't use checksums so we can't enable them here; once (if) they get enabled,
+            # this needs to be uncommented
+            # '--data-checksums',
+
+            '--encoding', 'UTF-8',
+            '--lc-collate', 'en_US.UTF-8',
+            '--lc-ctype', 'en_US.UTF-8',
+        ])
+
+        upgrade_command = [
+            pair.new_version.pg_upgrade,
+            '--jobs', str(multiprocessing.cpu_count()),
+            '--old-bindir', pair.old_version.bin_dir,
+            '--new-bindir', pair.new_version.bin_dir,
+            '--old-datadir', pair.old_version.main_dir,
+            '--new-datadir', pair.new_version.main_dir,
+            '--old-port', str(pair.old_version.port),
+            '--new-port', str(pair.new_version.port),
+            '--old-options', f" -c config_file={pair.old_version.tmp_conf_dir}/postgresql.conf",
+            '--new-options', f" -c config_file={pair.new_version.tmp_conf_dir}/postgresql.conf",
+            '--link',
+            '--verbose',
+        ]
+
+        logging.info("Testing if clusters are compatible...")
+        subprocess.check_call(upgrade_command + ['--check'], cwd=POSTGRES_DATA_DIR)
+
+        logging.info("Upgrading...")
+        subprocess.check_call(upgrade_command, cwd=POSTGRES_DATA_DIR)
+
+        logging.info("Cleaning up old data directory...")
+        shutil.rmtree(pair.old_version.data_dir)
+
+        logging.info("Cleaning up scripts...")
+        for script in [
+            'analyze_new_cluster.sh',
+            'delete_old_cluster.sh',
+            'pg_upgrade_internal.log',
+            'pg_upgrade_server.log',
+            'pg_upgrade_utility.log',
+        ]:
+            script_path = os.path.join(POSTGRES_DATA_DIR, script)
+            if os.path.isfile(script_path):
+                os.unlink(script_path)
+
+        logging.info(f"Done upgrading from {pair.old_version.version} to {pair.new_version.version}")
+
+    current_version = upgrade_pairs[-1].new_version
+
+    proc = _PostgreSQLServer(
+        port=current_version.port,
+        bin_dir=current_version.bin_dir,
+        data_dir=current_version.main_dir,
+        conf_dir=current_version.tmp_conf_dir,
+    )
+    proc.start()
+
+    logging.info("Running VACUUM ANALYZE...")
+    logging.info("(monitor locks while running that because PostgreSQL might decide to do autovacuum!)")
+
+    # FIXME temporarily disable autovacuum in the temp. config
+
+    subprocess.check_call([
+        current_version.vacuumdb,
+        '--port', str(current_version.port),
+        '--all',
+        '--verbose',
+        # Do --analyze-only instead of --analyze-in-stages because we're ready to wait for the full statistics
+        '--analyze-only',
+        '--jobs', str(multiprocessing.cpu_count()),
+    ])
+
+    proc.stop()
+
+    logging.info("Done!")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Upgrade PostgreSQL dataset.")
+    parser.add_argument("-s", "--source_version", type=int, required=True,
+                        help="Version to upgrade from")
+    parser.add_argument("-t", "--target_version", type=int, required=True,
+                        help="Version to upgrade to")
+    args = parser.parse_args()
+
+    postgres_upgrade(source_version=args.source_version, target_version=args.target_version)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/apps/purge-object-caches/.idea/mediawords.sql b/apps/purge-object-caches/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/purge-object-caches/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/purge-object-caches/.idea/misc.xml b/apps/purge-object-caches/.idea/misc.xml
index 4c12eeeb9d..0240bc7d67 100644
--- a/apps/purge-object-caches/.idea/misc.xml
+++ b/apps/purge-object-caches/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (purge-object-caches at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/purge-object-caches/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (purge-object-caches at [/home/pypt/m/apps/purge-object-caches/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/purge-object-caches/.idea/purge-object-caches.iml b/apps/purge-object-caches/.idea/purge-object-caches.iml
index ec5b1a0497..54087d86da 100644
--- a/apps/purge-object-caches/.idea/purge-object-caches.iml
+++ b/apps/purge-object-caches/.idea/purge-object-caches.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (purge-object-caches at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/purge-object-caches/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (purge-object-caches at [/home/pypt/m/apps/purge-object-caches/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/purge-object-caches/.idea/sqlDataSources.xml b/apps/purge-object-caches/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..d9d9e21161
--- /dev/null
+++ b/apps/purge-object-caches/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="4c26fc16-1dc4-49a5-8b88-1556d208cbac" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/purge-object-caches/docker-compose.tests.yml b/apps/purge-object-caches/docker-compose.tests.yml
index a19f62821f..1420a62a1c 100644
--- a/apps/purge-object-caches/docker-compose.tests.yml
+++ b/apps/purge-object-caches/docker-compose.tests.yml
@@ -43,5 +43,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/rescrape-media/.idea/mediawords.sql b/apps/rescrape-media/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/rescrape-media/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/rescrape-media/.idea/misc.xml b/apps/rescrape-media/.idea/misc.xml
index 9f7e834cda..d0b1e15d09 100644
--- a/apps/rescrape-media/.idea/misc.xml
+++ b/apps/rescrape-media/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (rescrape-media at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/rescrape-media/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (rescrape-media at [/home/pypt/m/apps/rescrape-media/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/rescrape-media/.idea/rescrape-media.iml b/apps/rescrape-media/.idea/rescrape-media.iml
index a23aa11380..bba1087a73 100644
--- a/apps/rescrape-media/.idea/rescrape-media.iml
+++ b/apps/rescrape-media/.idea/rescrape-media.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (rescrape-media at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/rescrape-media/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (rescrape-media at [/home/pypt/m/apps/rescrape-media/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/apps/rescrape-media/.idea/sqlDataSources.xml b/apps/rescrape-media/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..80ebb2caa7
--- /dev/null
+++ b/apps/rescrape-media/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="43e8f2b1-8771-4162-9ab5-dae72ad1ddba" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/rescrape-media/docker-compose.tests.yml b/apps/rescrape-media/docker-compose.tests.yml
index 00af15d826..42076ee535 100644
--- a/apps/rescrape-media/docker-compose.tests.yml
+++ b/apps/rescrape-media/docker-compose.tests.yml
@@ -50,8 +50,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/sitemap-fetch-media-pages/.idea/mediawords.sql b/apps/sitemap-fetch-media-pages/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/sitemap-fetch-media-pages/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/sitemap-fetch-media-pages/.idea/sqlDataSources.xml b/apps/sitemap-fetch-media-pages/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..d0fee50a0b
--- /dev/null
+++ b/apps/sitemap-fetch-media-pages/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="2d3b312c-b416-476c-93e4-1b8bdebf4671" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/solr-base/Dockerfile b/apps/solr-base/Dockerfile
index 9f58041061..0ff5015f9c 100644
--- a/apps/solr-base/Dockerfile
+++ b/apps/solr-base/Dockerfile
@@ -11,7 +11,7 @@ ENV MEDIACLOUD_SOLR_VERSION="6.5.0"
 # (distribution needed for running both Solr itself and ZooKeeper)
 RUN \
     mkdir -p /opt/solr/ && \
-    /dl_to_stdout.sh "https://archive.apache.org/dist/lucene/solr/${MEDIACLOUD_SOLR_VERSION}/solr-${MEDIACLOUD_SOLR_VERSION}.tgz" | \
+    /dl_to_stdout.sh "https://mediacloud-archive-apache-org.s3.amazonaws.com/solr-${MEDIACLOUD_SOLR_VERSION}.tgz" | \
 	    tar -zx -C /opt/solr/ --strip 1 && \
 	true
 
diff --git a/apps/solr-zookeeper/Dockerfile b/apps/solr-zookeeper/Dockerfile
index e278b78d05..ac70852292 100644
--- a/apps/solr-zookeeper/Dockerfile
+++ b/apps/solr-zookeeper/Dockerfile
@@ -12,7 +12,7 @@ ENV MEDIACLOUD_ZOOKEEPER_VERSION="3.4.10"
 # Download and extract ZooKeeper
 RUN \
     mkdir -p /opt/zookeeper/ && \
-    /dl_to_stdout.sh "https://archive.apache.org/dist/zookeeper/zookeeper-${MEDIACLOUD_ZOOKEEPER_VERSION}/zookeeper-${MEDIACLOUD_ZOOKEEPER_VERSION}.tar.gz" | \
+    /dl_to_stdout.sh "https://mediacloud-archive-apache-org.s3.amazonaws.com/zookeeper-${MEDIACLOUD_ZOOKEEPER_VERSION}.tar.gz" | \
         tar -zx -C /opt/zookeeper/ --strip 1 && \
     rm -rf /opt/zookeeper/conf/ && \
     true
diff --git a/apps/temporal-elasticsearch/.dockerignore b/apps/temporal-elasticsearch/.dockerignore
new file mode 100644
index 0000000000..9b2c362a80
--- /dev/null
+++ b/apps/temporal-elasticsearch/.dockerignore
@@ -0,0 +1,92 @@
+#
+# Files from the build context to be ignored by "docker build".
+#
+# You might want to add as many of constantly changing files here as possible
+# to prevent container's image from getting rebuilt every full moon.
+#
+# Unfortunately, we can't just symlink this file to every app's directory:
+#
+#     https://github.com/moby/moby/issues/12886
+#
+# so for the time being you have to manually copy this file to every app
+# subdirectory:
+#
+#     cd apps/
+#     find . -maxdepth 1 -type d \( ! -name . \) -exec bash -c "cd '{}' && cp ../dockerignore.dist ./.dockerignore" \;
+#
+
+*$py.class
+*.cover
+*.DS_Store
+*.egg
+*.egg-info/
+*.log
+*.manifest
+*.mo
+*.pot
+*.py[cod]
+*.sage.py
+*.so
+*.spec
+*.swp
+*/*.py[cod]
+*/*.swp
+*/*/*.py[cod]
+*/*/*.swp
+*/*/*/*.py[cod]
+*/*/*/*.swp
+*/*/*/__pycache__/
+*/*/__pycache__/
+*/__pycache__/
+._*
+.apdisk
+.AppleDB
+.AppleDesktop
+.AppleDouble
+.cache
+.com.apple.timemachine.donotpresent
+.coverage
+.coverage.*
+.dockerignore
+.DocumentRevisions-V100
+.DS_Store
+.eggs
+.env
+.fseventsd
+.git
+.gitignore
+.hypothesis
+.idea
+.installed.cfg
+.ipynb_checkpoints
+.LSOverride
+.mypy_cache
+.pytest_cache
+.Python
+.python-version
+.ropeproject
+.scrapy
+.Spotlight-V100
+.spyderproject
+.spyproject
+.TemporaryItems
+.tox
+.Trashes
+.venv
+.VolumeIcon.icns
+.webassets-cache
+__pycache__
+celerybeat-schedule
+coverage.xml
+Icon
+local_settings.py
+Network Trash Folder
+nosetests.xml
+parts
+pip-delete-this-directory.txt
+pip-log.txt
+sdist
+Temporary Items
+wheels
+_Inline
+
diff --git a/apps/temporal-elasticsearch/Dockerfile b/apps/temporal-elasticsearch/Dockerfile
new file mode 100644
index 0000000000..cb7cd58ca9
--- /dev/null
+++ b/apps/temporal-elasticsearch/Dockerfile
@@ -0,0 +1,35 @@
+#
+# Elasticsearch for Temporal
+#
+
+FROM gcr.io/mcback/elasticsearch-base:latest
+
+USER root
+
+COPY config/* /opt/elasticsearch/config/
+
+# Create keystore and move it to data volume
+RUN \
+    #
+    # Merge base and Temporal configs into one
+    cat \
+        /opt/elasticsearch/config/elasticsearch-base.yml \
+        /opt/elasticsearch/config/temporal-elasticsearch.yml \
+        > /opt/elasticsearch/config/elasticsearch.yml && \
+    #
+    true
+
+USER elasticsearch
+
+# Preload with Temporal index template
+# (https://github.com/temporalio/temporal/blob/v1.9.2/schema/elasticsearch/v7/visibility/index_template.json)
+COPY index_template.json setup_index_template.sh /
+RUN /setup_index_template.sh
+USER root
+RUN rm /index_template.json /setup_index_template.sh
+USER elasticsearch
+
+# Elasticsearch data
+VOLUME /var/lib/elasticsearch
+
+CMD ["/opt/elasticsearch/bin/elasticsearch.sh"]
diff --git a/apps/temporal-elasticsearch/config/.dockerignore b/apps/temporal-elasticsearch/config/.dockerignore
new file mode 100644
index 0000000000..b3c0a37b66
--- /dev/null
+++ b/apps/temporal-elasticsearch/config/.dockerignore
@@ -0,0 +1 @@
+elasticsearch.keystore
diff --git a/apps/temporal-elasticsearch/config/.gitignore b/apps/temporal-elasticsearch/config/.gitignore
new file mode 100644
index 0000000000..3eb03f777e
--- /dev/null
+++ b/apps/temporal-elasticsearch/config/.gitignore
@@ -0,0 +1,3 @@
+# Might get created by a Docker container
+elasticsearch.keystore
+
diff --git a/apps/temporal-elasticsearch/config/temporal-elasticsearch.yml b/apps/temporal-elasticsearch/config/temporal-elasticsearch.yml
new file mode 100644
index 0000000000..e96f46b92d
--- /dev/null
+++ b/apps/temporal-elasticsearch/config/temporal-elasticsearch.yml
@@ -0,0 +1,2 @@
+cluster.name: temporal-elasticsearch
+node.name: temporal-elasticsearch
diff --git a/apps/temporal-elasticsearch/index_template.json b/apps/temporal-elasticsearch/index_template.json
new file mode 100644
index 0000000000..73d18e7d9c
--- /dev/null
+++ b/apps/temporal-elasticsearch/index_template.json
@@ -0,0 +1,81 @@
+{
+  "order": 0,
+  "index_patterns": [
+    "temporal-visibility-*"
+  ],
+  "settings": {
+    "index": {
+      "number_of_shards": "5",
+      "number_of_replicas": "0",
+      "search.idle.after": "365d"
+    }
+  },
+  "mappings": {
+    "dynamic": "false",
+    "properties": {
+      "NamespaceId": {
+        "type": "keyword"
+      },
+      "WorkflowId": {
+        "type": "keyword"
+      },
+      "RunId": {
+        "type": "keyword"
+      },
+      "WorkflowType": {
+        "type": "keyword"
+      },
+      "StartTime": {
+        "type": "long"
+      },
+      "ExecutionTime": {
+        "type": "long"
+      },
+      "CloseTime": {
+        "type": "long"
+      },
+      "ExecutionStatus": {
+        "type": "long"
+      },
+      "TaskQueue": {
+        "type": "keyword"
+      },
+
+      "Attr": {
+        "properties": {
+          "TemporalChangeVersion": {
+            "type": "keyword"
+          },
+          "CustomStringField": {
+            "type": "text"
+          },
+          "CustomKeywordField": {
+            "type": "keyword"
+          },
+          "CustomIntField": {
+            "type": "long"
+          },
+          "CustomDoubleField": {
+            "type": "double"
+          },
+          "CustomBoolField": {
+            "type": "boolean"
+          },
+          "CustomDatetimeField": {
+            "type": "date"
+          },
+          "CustomNamespace": {
+            "type": "keyword"
+          },
+          "Operator": {
+            "type": "keyword"
+          },
+          "BinaryChecksums": {
+            "type": "keyword"
+          }
+        }
+      }
+    }
+  },
+  "aliases": {}
+}
diff --git a/apps/temporal-elasticsearch/setup_index_template.sh b/apps/temporal-elasticsearch/setup_index_template.sh
new file mode 100755
index 0000000000..ef42765eec
--- /dev/null
+++ b/apps/temporal-elasticsearch/setup_index_template.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -u
+set -e
+
+
+echo "Starting Elasticsearch for index setup..."
+/opt/elasticsearch/bin/elasticsearch &
+
+for i in {1..120}; do
+    echo "Waiting for Elasticsearch to start..."
+    if curl --silent --show-error --fail "http://127.0.0.1:9200/_cluster/health"; then
+        break
+    else
+        sleep 1
+    fi
+done
+
+
+echo "Creating Temporal index template..."
+curl -XPUT "http://127.0.0.1:9200/_template/temporal-visibility-template" \
+    --fail \
+    --silent \
+    --show-error \
+    -H "Content-Type: application/json" \
+    -d @index_template.json
+echo "Done creating Temporal index template."
+
+
+echo "Stopping Elasticsearch..."
+killall java
+while pgrep java > /dev/null; do
+    sleep 0.5
+done
diff --git a/apps/temporal-grafana/.dockerignore b/apps/temporal-grafana/.dockerignore
new file mode 100644
index 0000000000..9b2c362a80
--- /dev/null
+++ b/apps/temporal-grafana/.dockerignore
@@ -0,0 +1,92 @@
+#
+# Files from the build context to be ignored by "docker build".
+#
+# You might want to add as many of constantly changing files here as possible
+# to prevent container's image from getting rebuilt every full moon.
+#
+# Unfortunately, we can't just symlink this file to every app's directory:
+#
+#     https://github.com/moby/moby/issues/12886
+#
+# so for the time being you have to manually copy this file to every app
+# subdirectory:
+#
+#     cd apps/
+#     find . -maxdepth 1 -type d \( ! -name . \) -exec bash -c "cd '{}' && cp ../dockerignore.dist ./.dockerignore" \;
+#
+
+*$py.class
+*.cover
+*.DS_Store
+*.egg
+*.egg-info/
+*.log
+*.manifest
+*.mo
+*.pot
+*.py[cod]
+*.sage.py
+*.so
+*.spec
+*.swp
+*/*.py[cod]
+*/*.swp
+*/*/*.py[cod]
+*/*/*.swp
+*/*/*/*.py[cod]
+*/*/*/*.swp
+*/*/*/__pycache__/
+*/*/__pycache__/
+*/__pycache__/
+._*
+.apdisk
+.AppleDB
+.AppleDesktop
+.AppleDouble
+.cache
+.com.apple.timemachine.donotpresent
+.coverage
+.coverage.*
+.dockerignore
+.DocumentRevisions-V100
+.DS_Store
+.eggs
+.env
+.fseventsd
+.git
+.gitignore
+.hypothesis
+.idea
+.installed.cfg
+.ipynb_checkpoints
+.LSOverride
+.mypy_cache
+.pytest_cache
+.Python
+.python-version
+.ropeproject
+.scrapy
+.Spotlight-V100
+.spyderproject
+.spyproject
+.TemporaryItems
+.tox
+.Trashes
+.venv
+.VolumeIcon.icns
+.webassets-cache
+__pycache__
+celerybeat-schedule
+coverage.xml
+Icon
+local_settings.py
+Network Trash Folder
+nosetests.xml
+parts
+pip-delete-this-directory.txt
+pip-log.txt
+sdist
+Temporary Items
+wheels
+_Inline
+
diff --git a/apps/temporal-grafana/Dockerfile b/apps/temporal-grafana/Dockerfile
new file mode 100644
index 0000000000..9eb066861a
--- /dev/null
+++ b/apps/temporal-grafana/Dockerfile
@@ -0,0 +1,68 @@
+#
+# Grafana for Temporal stats
+#
+
+FROM gcr.io/mcback/base:latest
+
+# Install dependencies
+RUN \
+    apt-get -y --no-install-recommends install \
+        libfontconfig1 \
+    && \
+    true
+
+# Install Grafana
+RUN \
+    mkdir -p /opt/grafana/ && \
+    /dl_to_stdout.sh "https://dl.grafana.com/oss/release/grafana-7.5.5.linux-amd64.tar.gz" | \
+        tar -zx -C /opt/grafana/ --strip 1 && \
+    true
+
+RUN \
+	#
+	# Remove sample provisioning
+	rm -rf /opt/grafana/conf/provisioning/ && \
+	#
+	# Add unprivileged user the service will run as
+    useradd -ms /bin/bash temporal && \
+    mkdir -p \
+    	/var/lib/grafana/ \
+    	/var/lib/grafana/logs/ \
+    	/var/lib/grafana/plugins/ \
+    && \
+    chown temporal:temporal /var/lib/grafana/ && \
+    #
+    # Create directory for provisioning dashboards
+    mkdir -p /opt/grafana/dashboards/ && \
+    #
+    true
+
+COPY provisioning/ /opt/grafana/conf/provisioning/
+COPY dashboards/dashboards/* /opt/grafana/dashboards/
+
+# Test if submodules were checked out
+RUN \
+    if [ ! -f "/opt/grafana/dashboards/temporal.json" ]; then \
+        echo && \
+        echo "Git submodules haven't been checked out, please run:" && \
+        echo && \
+        echo "    git submodule update --init --recursive" && \
+        echo && \
+        echo "and then rebuild this image." && \
+        echo && \
+        exit 1; \
+    fi
+
+WORKDIR /opt/grafana/
+
+ENV PATH="/opt/grafana/bin:${PATH}"
+
+EXPOSE 3000
+
+VOLUME /var/lib/grafana/
+
+USER temporal
+
+COPY grafana.ini /opt/grafana/conf/
+
+CMD ["grafana-server", "-config", "/opt/grafana/conf/grafana.ini"]
diff --git a/apps/temporal-grafana/dashboards b/apps/temporal-grafana/dashboards
new file mode 160000
index 0000000000..6094dd666f
--- /dev/null
+++ b/apps/temporal-grafana/dashboards
@@ -0,0 +1 @@
+Subproject commit 6094dd666f386e76a3c03e0049f02521210b6883
diff --git a/apps/temporal-grafana/grafana.ini b/apps/temporal-grafana/grafana.ini
new file mode 100644
index 0000000000..9b9d4ca5c8
--- /dev/null
+++ b/apps/temporal-grafana/grafana.ini
@@ -0,0 +1,122 @@
+# possible values : production, development
+app_mode = production
+
+# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty
+instance_name = temporal-grafana
+
+#################################### Paths ####################################
+[paths]
+# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used)
+data = /var/lib/grafana
+
+# Directory where grafana can store logs
+logs = /var/lib/grafana/logs
+
+# Directory where grafana will automatically scan and look for plugins
+plugins = /var/lib/grafana/plugins
+
+#################################### Server ####################################
+[server]
+
+# The http port  to use
+http_port = 3000
+
+#################################### Analytics ####################################
+[analytics]
+# Server reporting, sends usage counters to stats.grafana.org every 24 hours.
+# No ip addresses are being tracked, only simple counters to track
+# running instances, dashboard and error counts. It is very helpful to us.
+# Change this option to false to disable reporting.
+reporting_enabled = false
+
+# Set to false to disable all checks to https://grafana.net
+# for new versions (grafana itself and plugins), check is used
+# in some UI views to notify that grafana or plugin update exists
+# This option does not cause any auto updates, nor send any information
+# only a GET request to http://grafana.com to get latest versions
+check_for_updates = false
+
+#################################### Security ####################################
+[security]
+# disable creation of admin user on first start of grafana
+disable_initial_admin_creation = false
+
+# default admin user, created on startup
+admin_user = mediacloud
+
+# default admin password, can be changed before first start of grafana,  or in profile settings
+admin_password = mediacloud
+
+# used for signing
+# (Media Cloud's Grafana is hosted behind a firewall so this can be anything really)
+secret_key = wkKjdjnUL9j27QW4L2w5
+
+# disable gravatar profile images
+disable_gravatar = true
+
+# disable protection against brute force login attempts
+disable_brute_force_login_protection = true
+
+#################################### Snapshots ###########################
+[snapshots]
+# snapshot sharing options
+external_enabled = false
+
+#################################### Dashboards History ##################
+[dashboards]
+
+# Path to the default home dashboard. If this value is empty, then Grafana uses StaticRootPath + "dashboards/home.json"
+default_home_dashboard_path = dashboards/temporal.json
+
+#################################### Users ###############################
+[users]
+# disable user signup / registration
+allow_sign_up = false
+
+# Allow non admin users to create organizations
+allow_org_create = false
+
+# Background text for the user field on the login page
+login_hint = mediacloud
+password_hint = mediacloud
+
+# Default UI theme ("dark" or "light")
+default_theme = light
+
+[auth]
+
+# Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false
+disable_signout_menu = true
+
+#################################### Anonymous Auth ######################
+[auth.anonymous]
+# enable anonymous access
+enabled = false
+
+#################################### Logging ##########################
+[log]
+# Either "console", "file", "syslog". Default is console and  file
+# Use space to separate multiple modes, e.g. "console file"
+mode = console
+
+format = text
+
+#################################### Alerting ############################
+[alerting]
+# Disable alerting engine & UI features
+enabled = false
+
+#################################### Annotations #########################
+[annotations]
+# Configures the batch size for the annotation clean-up job. This setting is used for dashboard, API, and alert annotations.
+;cleanupjob_batchsize = 100
+
+#################################### Explore #############################
+[explore]
+# Enable the Explore section
+enabled = true
+
+[date_formats]
+
+# Default timezone for user preferences. Options are 'browser' for the browser local timezone or a timezone name from IANA Time Zone database, e.g. 'UTC' or 'Europe/Amsterdam' etc.
+default_timezone = 'America/New_York'
diff --git a/apps/temporal-grafana/provisioning/dashboards/temporal.yml b/apps/temporal-grafana/provisioning/dashboards/temporal.yml
new file mode 100644
index 0000000000..5fbf68b99b
--- /dev/null
+++ b/apps/temporal-grafana/provisioning/dashboards/temporal.yml
@@ -0,0 +1,10 @@
+apiVersion: 1
+providers:
+  - name: 'default'
+    orgId: 1
+    folder: ''
+    type: file
+    disableDeletion: true
+    editable: false
+    options:
+      path: /opt/grafana/dashboards/
diff --git a/apps/temporal-grafana/provisioning/datasources/temporal.yml b/apps/temporal-grafana/provisioning/datasources/temporal.yml
new file mode 100644
index 0000000000..9722c5904c
--- /dev/null
+++ b/apps/temporal-grafana/provisioning/datasources/temporal.yml
@@ -0,0 +1,7 @@
+apiVersion: 1
+datasources:
+  - name: TemporalMetrics
+    type: prometheus
+    url: http://temporal-prometheus:9090
+    access: proxy
+    isDefault: true
diff --git a/apps/podcast-poll-due-operations/tests/python/__init__.py b/apps/temporal-grafana/provisioning/notifiers/.empty_dir
similarity index 100%
rename from apps/podcast-poll-due-operations/tests/python/__init__.py
rename to apps/temporal-grafana/provisioning/notifiers/.empty_dir
diff --git a/apps/podcast-submit-operation/src/python/podcast_submit_operation/__init__.py b/apps/temporal-grafana/provisioning/plugins/.empty_dir
similarity index 100%
rename from apps/podcast-submit-operation/src/python/podcast_submit_operation/__init__.py
rename to apps/temporal-grafana/provisioning/plugins/.empty_dir
diff --git a/apps/temporal-postgresql/.dockerignore b/apps/temporal-postgresql/.dockerignore
new file mode 100644
index 0000000000..9b2c362a80
--- /dev/null
+++ b/apps/temporal-postgresql/.dockerignore
@@ -0,0 +1,92 @@
+#
+# Files from the build context to be ignored by "docker build".
+#
+# You might want to add as many of constantly changing files here as possible
+# to prevent container's image from getting rebuilt every full moon.
+#
+# Unfortunately, we can't just symlink this file to every app's directory:
+#
+#     https://github.com/moby/moby/issues/12886
+#
+# so for the time being you have to manually copy this file to every app
+# subdirectory:
+#
+#     cd apps/
+#     find . -maxdepth 1 -type d \( ! -name . \) -exec bash -c "cd '{}' && cp ../dockerignore.dist ./.dockerignore" \;
+#
+
+*$py.class
+*.cover
+*.DS_Store
+*.egg
+*.egg-info/
+*.log
+*.manifest
+*.mo
+*.pot
+*.py[cod]
+*.sage.py
+*.so
+*.spec
+*.swp
+*/*.py[cod]
+*/*.swp
+*/*/*.py[cod]
+*/*/*.swp
+*/*/*/*.py[cod]
+*/*/*/*.swp
+*/*/*/__pycache__/
+*/*/__pycache__/
+*/__pycache__/
+._*
+.apdisk
+.AppleDB
+.AppleDesktop
+.AppleDouble
+.cache
+.com.apple.timemachine.donotpresent
+.coverage
+.coverage.*
+.dockerignore
+.DocumentRevisions-V100
+.DS_Store
+.eggs
+.env
+.fseventsd
+.git
+.gitignore
+.hypothesis
+.idea
+.installed.cfg
+.ipynb_checkpoints
+.LSOverride
+.mypy_cache
+.pytest_cache
+.Python
+.python-version
+.ropeproject
+.scrapy
+.Spotlight-V100
+.spyderproject
+.spyproject
+.TemporaryItems
+.tox
+.Trashes
+.venv
+.VolumeIcon.icns
+.webassets-cache
+__pycache__
+celerybeat-schedule
+coverage.xml
+Icon
+local_settings.py
+Network Trash Folder
+nosetests.xml
+parts
+pip-delete-this-directory.txt
+pip-log.txt
+sdist
+Temporary Items
+wheels
+_Inline
+
diff --git a/apps/temporal-postgresql/Dockerfile b/apps/temporal-postgresql/Dockerfile
new file mode 100644
index 0000000000..3e4cad27dd
--- /dev/null
+++ b/apps/temporal-postgresql/Dockerfile
@@ -0,0 +1,98 @@
+#
+# PostgreSQL server for Temporal's workflow storage
+#
+
+FROM gcr.io/mcback/postgresql-base:latest
+
+USER root
+
+RUN \
+    mkdir -p \
+        /opt/temporal-postgresql/bin/ \
+        /opt/temporal-postgresql/schema/ \
+    && \
+    #
+    # Install temporal-sql-tool
+    # Keep version that's being used in sync with temporal-server
+    mkdir -p /var/tmp/temporal/ && \
+    /dl_to_stdout.sh "https://github.com/temporalio/temporal/releases/download/v1.9.2/temporal_1.9.2_linux_amd64.tar.gz" | \
+        tar -zx -C /var/tmp/temporal/ && \
+    mv \
+        # Needed for creating the default namespace
+        /var/tmp/temporal/tctl \
+        # Needed for temporarily starting the server at build time to create
+        # the default namespace
+        /var/tmp/temporal/temporal-server \
+        # Needed for initializing default schema
+        /var/tmp/temporal/temporal-sql-tool \
+        #
+        /usr/bin/ && \
+    rm -rf /var/tmp/temporal/ && \
+    true
+
+# Check out schema
+RUN \
+    apt-get -y --no-install-recommends install git && \
+    mkdir -p /var/tmp/temporal/ && \
+    cd /var/tmp/temporal/ && \
+    git init && \
+    git remote add origin https://github.com/temporalio/temporal.git && \
+    # HEAD of "v1.9.2" tag:
+    git fetch --depth 1 origin d3acf160e51deb60ac798746fc06fc5c46c46269 && \
+    git checkout FETCH_HEAD && \
+    mv schema/postgresql/* /opt/temporal-postgresql/schema/ && \
+    cd / && \
+    rm -rf /var/tmp/temporal/ && \
+    apt-get -y remove git && \
+    apt-get -y autoremove && \
+    apt-get -y clean && \
+    true
+
+# Install envsubst for generating configuration
+RUN apt-get -y --no-install-recommends install gettext-base
+
+RUN mkdir -p /opt/temporal-server/config/
+COPY temporal-config/* /opt/temporal-server/config/
+
+# Allow a final mediacloud.yml to get generated
+RUN chown postgres:postgres /opt/temporal-server/config/
+
+# Copy helper scripts
+COPY bin/* /opt/temporal-postgresql/bin/
+
+USER postgres
+
+# Initialize data volume, create users, a database, and initialize it with
+# schema
+# If a new empty volume gets mounted to /var/lib/postgresql/ upon
+# container start, Docker will copy the files from the container to the volume
+RUN /opt/temporal-postgresql/bin/initialize_schema.sh
+
+# Remove the init script, Temporal server and configuration so that someone
+# doesn't accidentally run it in production
+USER root
+RUN \
+    rm -rf \
+        /opt/temporal-postgresql/bin/initialize_schema.sh \
+        /usr/bin/tctl \
+        /usr/bin/temporal-server \
+        /opt/temporal-server/ \
+    && \
+    true
+USER postgres
+
+ENV \
+    PATH="/opt/temporal-postgresql/bin:${PATH}" \
+    #
+    # Make sure that we can connect via "psql" without sudoing into "postgres" user
+    PGHOST=localhost \
+    PGPORT=5432 \
+    PGUSER=temporal \
+    PGPASSWORD=temporal \
+    PGDATABASE=temporal
+
+# PostgreSQL data
+VOLUME /var/lib/postgresql/
+
+# Use our own wrapper script which runs schema upgrades first
+CMD ["/opt/temporal-postgresql/bin/postgresql.sh"]
diff --git a/apps/temporal-postgresql/bin/apply_migrations.sh b/apps/temporal-postgresql/bin/apply_migrations.sh
new file mode 100755
index 0000000000..27c5fa233f
--- /dev/null
+++ b/apps/temporal-postgresql/bin/apply_migrations.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+set -u
+set -e
+
+MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/13/bin/"
+MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/13/main/"
+MC_POSTGRESQL_CONF_PATH="/etc/postgresql/13/main/postgresql.conf"
+
+# Apply migrations when running on a different port so that clients don't end
+# up connecting in the middle of migrating
+TEMP_PORT=12345
+
+# In case the database is in recovery, wait for up to 1 hour for it to complete
+PGCTL_START_TIMEOUT=3600
+
+# Start PostgreSQL on a temporary port
+"${MC_POSTGRESQL_BIN_DIR}/pg_ctl" \
+    -o "-c config_file=${MC_POSTGRESQL_CONF_PATH} -p ${TEMP_PORT}" \
+    -D "${MC_POSTGRESQL_DATA_DIR}" \
+    -t "${PGCTL_START_TIMEOUT}" \
+    -w \
+    start
+
+VENDOR_SCHEMA_DIR="/opt/temporal-postgresql/schema/v96"
+TSQL="temporal-sql-tool \
+    --plugin postgres \
+    --ep 127.0.0.1 \
+    -p 12345 \
+    -u temporal \
+    --pw temporal"
+
+MAIN_SCHEMA_DIR="${VENDOR_SCHEMA_DIR}/temporal/versioned"
+$TSQL --db temporal update-schema -d "${MAIN_SCHEMA_DIR}"
+
+VISIBILITY_SCHEMA_DIR="${VENDOR_SCHEMA_DIR}/visibility/versioned"
+$TSQL --db temporal_visibility update-schema -d "${VISIBILITY_SCHEMA_DIR}"
+
+# Stop PostgreSQL
+"${MC_POSTGRESQL_BIN_DIR}/pg_ctl" \
+    -D "${MC_POSTGRESQL_DATA_DIR}" \
+    -m fast \
+    -w \
+    stop
diff --git a/apps/temporal-postgresql/bin/initialize_schema.sh b/apps/temporal-postgresql/bin/initialize_schema.sh
new file mode 100755
index 0000000000..df22aba2b3
--- /dev/null
+++ b/apps/temporal-postgresql/bin/initialize_schema.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+#
+# FIXME reuse code between "initialize_schema.sh" and "apply_migrations.sh"
+#
+
+set -u
+set -e
+
+MC_POSTGRESQL_BIN_DIR="/usr/lib/postgresql/13/bin/"
+MC_POSTGRESQL_DATA_DIR="/var/lib/postgresql/13/main/"
+MC_POSTGRESQL_CONF_PATH="/etc/postgresql/13/main/postgresql.conf"
+
+# Update memory configuration
+/opt/postgresql-base/bin/update_memory_config.sh
+
+"${MC_POSTGRESQL_BIN_DIR}/pg_ctl" \
+    -o "-c config_file=${MC_POSTGRESQL_CONF_PATH}" \
+    -D "${MC_POSTGRESQL_DATA_DIR}" \
+    -w \
+    -t 1200 \
+    start
+
+psql -v ON_ERROR_STOP=1 -c "CREATE USER temporal WITH PASSWORD 'temporal' SUPERUSER;"
+
+VENDOR_SCHEMA_DIR="/opt/temporal-postgresql/schema/v96"
+TSQL="temporal-sql-tool \
+    --plugin postgres \
+    --ep 127.0.0.1 \
+    -p 5432 \
+    -u temporal \
+    --pw temporal \
+"
+
+MAIN_SCHEMA_DIR="${VENDOR_SCHEMA_DIR}/temporal/versioned"
+$TSQL create --db temporal
+$TSQL --db temporal setup-schema -v 0.0
+$TSQL --db temporal update-schema -d "${MAIN_SCHEMA_DIR}"
+
+VISIBILITY_SCHEMA_DIR="${VENDOR_SCHEMA_DIR}/visibility/versioned"
+$TSQL create --db temporal_visibility
+$TSQL --db temporal_visibility setup-schema -v 0.0
+$TSQL --db temporal_visibility update-schema -d "${VISIBILITY_SCHEMA_DIR}"
+
+# Both listen on localhost and expect to find PostgreSQL locally too
+export MC_TEMPORAL_POSTGRESQL_HOST="127.0.0.1"
+export MC_TEMPORAL_HOST_IP="127.0.0.1"
+
+# Generate final config
+envsubst \
+    < /opt/temporal-server/config/mediacloud_template.yaml \
+    > /opt/temporal-server/config/mediacloud.yaml
+
+# Start the server in the background
+temporal-server --root /opt/temporal-server --env mediacloud start &
+
+# Create the default namespace whenever the server becomes ready
+until tctl --ns default namespace describe < /dev/null; do
+    echo "Default namespace not found. Creating..."
+    sleep 0.2
+
+    # FIXME retention period rather short
+    tctl \
+        --ns default \
+        namespace register \
+        --rd 1 \
+        --desc "Default namespace for Temporal Server" \
+        || echo "Creating default namespace failed."
+
+done
+
+# Even after creating the default namespace, it doesn't become immediately ready
+# so wait for a bit
+echo "Waiting for the default namespace to propagate..."
+sleep 30
+
+killall -9 temporal-server
+
+# Stop PostgreSQL
+"${MC_POSTGRESQL_BIN_DIR}/pg_ctl" \
+    -D "${MC_POSTGRESQL_DATA_DIR}" \
+    -m fast \
+    -w \
+    -t 1200 \
+    stop
+
+# Create a file that will denote that we're running off a fresh data volume and
+# it's the first time ever that we've started the server
+cat > /var/lib/postgresql/first_run << EOF
+If this file exists, it means that a fresh data volume was just mounted to the
+container, and the container is about to run for the first time ever, so
+there's no point in attempting to check the schema version and apply
+migrations.
+
+After the first time this container gets run, this file will get deleted and
+every subsequent run of the same container will then attempt to apply
+migrations in order to upgrade the schema before continuing with anything else.
+EOF
+chown postgres:postgres /var/lib/postgresql/first_run
diff --git a/apps/temporal-postgresql/bin/postgresql.sh b/apps/temporal-postgresql/bin/postgresql.sh
new file mode 100755
index 0000000000..4b7af3a946
--- /dev/null
+++ b/apps/temporal-postgresql/bin/postgresql.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -u
+set -e
+
+# Update memory configuration
+/opt/postgresql-base/bin/update_memory_config.sh
+
+# Run schema migrations if needed
+if [ -e /var/lib/postgresql/first_run ]; then
+    echo "Skipping schema migrations on first run..."
+    rm /var/lib/postgresql/first_run
+elif [ ! -z ${MC_TEMPORAL_SKIP_MIGRATIONS+x} ]; then
+    echo "Skipping schema migrations because 'MC_TEMPORAL_SKIP_MIGRATIONS' is set."
+else
+    echo "Applying schema migrations..."
+    /opt/temporal-postgresql/bin/apply_migrations.sh
+    echo "Done applying schema migrations."
+fi
+
+# Start PostgreSQL
+exec /opt/postgresql-base/bin/postgresql.sh
diff --git a/apps/temporal-postgresql/temporal-config b/apps/temporal-postgresql/temporal-config
new file mode 160000
index 0000000000..429e50e8f7
--- /dev/null
+++ b/apps/temporal-postgresql/temporal-config
@@ -0,0 +1 @@
+Subproject commit 429e50e8f728a1ce52a406ee0e114da2b2201ba7
diff --git a/apps/temporal-prometheus/.dockerignore b/apps/temporal-prometheus/.dockerignore
new file mode 100644
index 0000000000..9b2c362a80
--- /dev/null
+++ b/apps/temporal-prometheus/.dockerignore
@@ -0,0 +1,92 @@
+#
+# Files from the build context to be ignored by "docker build".
+#
+# You might want to add as many of constantly changing files here as possible
+# to prevent container's image from getting rebuilt every full moon.
+#
+# Unfortunately, we can't just symlink this file to every app's directory:
+#
+#     https://github.com/moby/moby/issues/12886
+#
+# so for the time being you have to manually copy this file to every app
+# subdirectory:
+#
+#     cd apps/
+#     find . -maxdepth 1 -type d \( ! -name . \) -exec bash -c "cd '{}' && cp ../dockerignore.dist ./.dockerignore" \;
+#
+
+*$py.class
+*.cover
+*.DS_Store
+*.egg
+*.egg-info/
+*.log
+*.manifest
+*.mo
+*.pot
+*.py[cod]
+*.sage.py
+*.so
+*.spec
+*.swp
+*/*.py[cod]
+*/*.swp
+*/*/*.py[cod]
+*/*/*.swp
+*/*/*/*.py[cod]
+*/*/*/*.swp
+*/*/*/__pycache__/
+*/*/__pycache__/
+*/__pycache__/
+._*
+.apdisk
+.AppleDB
+.AppleDesktop
+.AppleDouble
+.cache
+.com.apple.timemachine.donotpresent
+.coverage
+.coverage.*
+.dockerignore
+.DocumentRevisions-V100
+.DS_Store
+.eggs
+.env
+.fseventsd
+.git
+.gitignore
+.hypothesis
+.idea
+.installed.cfg
+.ipynb_checkpoints
+.LSOverride
+.mypy_cache
+.pytest_cache
+.Python
+.python-version
+.ropeproject
+.scrapy
+.Spotlight-V100
+.spyderproject
+.spyproject
+.TemporaryItems
+.tox
+.Trashes
+.venv
+.VolumeIcon.icns
+.webassets-cache
+__pycache__
+celerybeat-schedule
+coverage.xml
+Icon
+local_settings.py
+Network Trash Folder
+nosetests.xml
+parts
+pip-delete-this-directory.txt
+pip-log.txt
+sdist
+Temporary Items
+wheels
+_Inline
+
diff --git a/apps/temporal-prometheus/Dockerfile b/apps/temporal-prometheus/Dockerfile
new file mode 100644
index 0000000000..a935e541fc
--- /dev/null
+++ b/apps/temporal-prometheus/Dockerfile
@@ -0,0 +1,32 @@
+#
+# Prometheus for Temporal stats
+#
+
+FROM gcr.io/mcback/base:latest
+
+RUN \
+    mkdir -p /opt/prometheus/ && \
+    /dl_to_stdout.sh "https://github.com/prometheus/prometheus/releases/download/v2.26.0/prometheus-2.26.0.linux-amd64.tar.gz" | \
+        tar -zx -C /opt/prometheus/ --strip 1 && \
+    true
+
+COPY prometheus.yml /opt/prometheus/
+
+# Add unprivileged user the service will run as
+RUN \
+    useradd -ms /bin/bash temporal && \
+    mkdir -p /opt/prometheus/data/ && \
+    chown temporal:temporal /opt/prometheus/data/ && \
+    true
+
+WORKDIR /opt/prometheus/
+
+ENV PATH="/opt/prometheus:${PATH}"
+
+EXPOSE 9090
+
+USER temporal
+
+VOLUME /opt/prometheus/data/
+
+CMD ["prometheus"]
diff --git a/apps/temporal-prometheus/prometheus.yml b/apps/temporal-prometheus/prometheus.yml
new file mode 100644
index 0000000000..0a62dfbacb
--- /dev/null
+++ b/apps/temporal-prometheus/prometheus.yml
@@ -0,0 +1,22 @@
+global:
+  scrape_interval: 5s
+  scrape_timeout: 5s
+
+scrape_configs:
+
+  - job_name: 'prometheus'
+    static_configs:
+    - targets:
+      - 'localhost:9090'
+
+  - job_name: 'services'
+    static_configs:
+    - targets:
+      # frontend
+      - 'temporal-server:9091'
+      # matching
+      - 'temporal-server:9092'
+      # history
+      - 'temporal-server:9093'
+      # worker
+      - 'temporal-server:9094'
diff --git a/apps/temporal-server/.dockerignore b/apps/temporal-server/.dockerignore
new file mode 100644
index 0000000000..9b2c362a80
--- /dev/null
+++ b/apps/temporal-server/.dockerignore
@@ -0,0 +1,92 @@
+#
+# Files from the build context to be ignored by "docker build".
+#
+# You might want to add as many of constantly changing files here as possible
+# to prevent container's image from getting rebuilt every full moon.
+#
+# Unfortunately, we can't just symlink this file to every app's directory:
+#
+#     https://github.com/moby/moby/issues/12886
+#
+# so for the time being you have to manually copy this file to every app
+# subdirectory:
+#
+#     cd apps/
+#     find . -maxdepth 1 -type d \( ! -name . \) -exec bash -c "cd '{}' && cp ../dockerignore.dist ./.dockerignore" \;
+#
+
+*$py.class
+*.cover
+*.DS_Store
+*.egg
+*.egg-info/
+*.log
+*.manifest
+*.mo
+*.pot
+*.py[cod]
+*.sage.py
+*.so
+*.spec
+*.swp
+*/*.py[cod]
+*/*.swp
+*/*/*.py[cod]
+*/*/*.swp
+*/*/*/*.py[cod]
+*/*/*/*.swp
+*/*/*/__pycache__/
+*/*/__pycache__/
+*/__pycache__/
+._*
+.apdisk
+.AppleDB
+.AppleDesktop
+.AppleDouble
+.cache
+.com.apple.timemachine.donotpresent
+.coverage
+.coverage.*
+.dockerignore
+.DocumentRevisions-V100
+.DS_Store
+.eggs
+.env
+.fseventsd
+.git
+.gitignore
+.hypothesis
+.idea
+.installed.cfg
+.ipynb_checkpoints
+.LSOverride
+.mypy_cache
+.pytest_cache
+.Python
+.python-version
+.ropeproject
+.scrapy
+.Spotlight-V100
+.spyderproject
+.spyproject
+.TemporaryItems
+.tox
+.Trashes
+.venv
+.VolumeIcon.icns
+.webassets-cache
+__pycache__
+celerybeat-schedule
+coverage.xml
+Icon
+local_settings.py
+Network Trash Folder
+nosetests.xml
+parts
+pip-delete-this-directory.txt
+pip-log.txt
+sdist
+Temporary Items
+wheels
+_Inline
+
diff --git a/apps/temporal-server/Dockerfile b/apps/temporal-server/Dockerfile
new file mode 100644
index 0000000000..3fb7674dca
--- /dev/null
+++ b/apps/temporal-server/Dockerfile
@@ -0,0 +1,76 @@
+#
+# Temporal server
+#
+
+FROM gcr.io/mcback/base:latest
+
+# Install dependencies
+RUN \
+    apt-get -y --no-install-recommends install \
+        libprotobuf17 \
+    && \
+    true
+
+# Install Temporal server
+RUN \
+    # Keep version that's being used in sync with temporal-postgresql
+    mkdir -p /var/tmp/temporal/ && \
+    /dl_to_stdout.sh "https://github.com/temporalio/temporal/releases/download/v1.9.2/temporal_1.9.2_linux_amd64.tar.gz" | \
+        tar -zx -C /var/tmp/temporal/ && \
+    mv /var/tmp/temporal/temporal-server /var/tmp/temporal/tctl /usr/bin/ && \
+    cd / && \
+    rm -rf /var/tmp/temporal/ && \
+    true
+
+RUN \
+    #
+    # Install envsubst for generating configuration
+    apt-get -y --no-install-recommends install \
+        gettext-base \
+    && \
+    #
+    # Install utilities useful for tctl
+    apt-get -y --no-install-recommends install \
+        jq \
+    && \
+    #
+    # Add unprivileged user the service will run as
+    useradd -ms /bin/bash temporal && \
+    #
+    # Directory for wrapper scripts
+    mkdir -p /opt/temporal-server/bin/ && \
+    #
+    # Directory for configuration (has to be writable to generate final
+    # configuration files from templates)
+    mkdir -p /opt/temporal-server/config/ && \
+    chown temporal:temporal /opt/temporal-server/config/ && \
+    #
+    # Directories workflow archival
+    mkdir -p \
+        /var/lib/temporal/archival/temporal/ \
+        /var/lib/temporal/archival/visibility/ \
+    && \
+    chown -R temporal:temporal /var/lib/temporal/ && \
+    #
+    true
+
+COPY bin/* /opt/temporal-server/bin/
+COPY config/* /opt/temporal-server/config/
+
+ENV PATH="/opt/temporal-server/bin:${PATH}" \
+    # https://docs.temporal.io/docs/tctl/#environment-variables
+    TEMPORAL_CLI_ADDRESS="temporal-server:7233" \
+    TEMPORAL_CLI_NAMESPACE="default"
+
+# Archives
+VOLUME /var/lib/temporal/
+
+EXPOSE \
+    # Port descriptions: https://docs.temporal.io/docs/server-architecture/
+    6933 6934 6935 6939 7233 7234 7235 7239 \
+    # Prometheus endpoints
+    9091 9092 9093 9094
+
+USER temporal
+
+CMD ["temporal.sh"]
diff --git a/apps/temporal-server/bin/temporal.sh b/apps/temporal-server/bin/temporal.sh
new file mode 100755
index 0000000000..31cf01edaf
--- /dev/null
+++ b/apps/temporal-server/bin/temporal.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+set -u
+set -e
+
+export MC_TEMPORAL_POSTGRESQL_HOST="temporal-postgresql"
+
+# Hostname for binding configuration
+export MC_TEMPORAL_HOST_IP=$(hostname -i)
+
+# Generate final config
+envsubst \
+    < /opt/temporal-server/config/mediacloud_template.yaml \
+    > /opt/temporal-server/config/mediacloud.yaml
+
+# FIXME give up and crash after a while
+
+while true; do
+    echo "Waiting for PostgreSQL to start..."
+    if nc -z -w 10 temporal-postgresql 5432; then
+        break
+    else
+        sleep 1
+    fi
+done
+
+while true; do
+    echo "Waiting for Elasticsearch to start..."
+    if curl --silent --show-error --fail "http://temporal-elasticsearch:9200/_cluster/health"; then
+        break
+    else
+        sleep 1
+    fi
+done
+
+# FIXME perhaps run all four services ("frontend", "history", "matching", "worker")
+# as separate containers?
+exec temporal-server \
+    --root /opt/temporal-server \
+    --env mediacloud \
+    start
diff --git a/apps/temporal-server/config b/apps/temporal-server/config
new file mode 160000
index 0000000000..429e50e8f7
--- /dev/null
+++ b/apps/temporal-server/config
@@ -0,0 +1 @@
+Subproject commit 429e50e8f728a1ce52a406ee0e114da2b2201ba7
diff --git a/apps/temporal-server/docker-compose.tests.yml b/apps/temporal-server/docker-compose.tests.yml
new file mode 100644
index 0000000000..89ab2f7968
--- /dev/null
+++ b/apps/temporal-server/docker-compose.tests.yml
@@ -0,0 +1,194 @@
+version: "3.7"
+
+services:
+
+    # Service to use for testing the Temporal service
+    #
+    # Usage:
+    #
+    #     host$ ./dev/run.py temporal-server bash
+    #     container$ python3
+    #
+    #     ...and then submit a Temporal workflow somehow.
+    #
+    temporal-server:
+        image: gcr.io/mcback/common:latest
+        init: true
+        stop_signal: SIGKILL
+        depends_on:
+            - temporal-server-actual
+            - temporal-webapp
+
+    # Actual Temporal server, operating under "temporal-server" alias
+    temporal-server-actual:
+        image: gcr.io/mcback/temporal-server:latest
+        init: true
+        stop_signal: SIGKILL
+        depends_on:
+            - temporal-postgresql
+            - temporal-elasticsearch
+            - temporal-prometheus
+        networks:
+            default:
+                aliases:
+                    - temporal-server
+        expose:
+            - 6933
+            - 6934
+            - 6935
+            - 6939
+            - 7233
+            - 7234
+            - 7235
+            - 7239
+            - 9091
+            - 9092
+            - 9093
+            - 9094
+        ports:
+            # Expose to host for debugging
+            - "6933:6933"
+            - "6934:6934"
+            - "6935:6935"
+            - "6939:6939"
+            - "7233:7233"
+            - "7234:7234"
+            - "7235:7235"
+            - "7239:7239"
+            - "9091:9091"
+            - "9092:9092"
+            - "9093:9093"
+            - "9094:9094"
+        volumes:
+            - type: bind
+              source: ./bin/
+              target: /opt/temporal-server/bin/
+            - type: bind
+              source: ./config/dynamicconfig.yaml
+              target: /opt/temporal-server/config/dynamicconfig.yaml
+            - type: bind
+              source: ./config/mediacloud_template.yaml
+              target: /opt/temporal-server/config/mediacloud_template.yaml
+
+    temporal-postgresql:
+        image: gcr.io/mcback/temporal-postgresql:latest
+        init: true
+        stop_signal: SIGKILL
+        networks:
+            - default
+        expose:
+            - 5432
+        ports:
+            # Expose to host for debugging
+            - "5432:5432"
+        volumes:
+            - type: bind
+              source: ./../temporal-postgresql/bin/
+              target: /opt/temporal-postgresql/bin/
+            - type: bind
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
+
+    temporal-elasticsearch:
+        image: gcr.io/mcback/temporal-elasticsearch:latest
+        init: true
+        stop_signal: SIGKILL
+        networks:
+            - default
+        expose:
+            - "9200"
+            - "9300"
+        ports:
+            # Expose to host for debugging
+            - "9200:9200"
+            - "9300:9300"
+        volumes:
+            - type: bind
+              source: ./../elasticsearch-base/bin/elasticsearch.sh
+              target: /opt/elasticsearch/bin/elasticsearch.sh
+            # Not mounting config as it gets concatenated into a single file
+        # Limit CPUs and RAM for the process to not get too greedy
+        deploy:
+            resources:
+                limits:
+                    cpus: "2"
+                    memory: "2G"
+
+    temporal-prometheus:
+        image: gcr.io/mcback/temporal-prometheus:latest
+        init: true
+        stop_signal: SIGKILL
+        depends_on:
+            - temporal-grafana
+        networks:
+            - default
+        expose:
+            - "9090"
+        ports:
+            # Expose to host for debugging
+            - "9090:9090"
+        volumes:
+            - type: bind
+              source: ./../temporal-prometheus/prometheus.yml
+              target: /opt/prometheus/prometheus.yml
+        # Limit CPUs and RAM for the process to not get too greedy
+        deploy:
+            resources:
+                limits:
+                    cpus: "2"
+                    memory: "2G"
+
+    temporal-grafana:
+        image: gcr.io/mcback/temporal-grafana:latest
+        init: true
+        stop_signal: SIGKILL
+        networks:
+            - default
+        expose:
+            - "3000"
+        ports:
+            # Expose to host for debugging
+            - "3000:3000"
+        volumes:
+            - type: bind
+              source: ./../temporal-grafana/grafana.ini
+              target: /opt/grafana/conf/grafana.ini
+            - type: bind
+              source: ./../temporal-grafana/provisioning/
+              target: /opt/grafana/provisioning/
+            - type: bind
+              source: ./../temporal-grafana/dashboards/dashboards/
+              target: /opt/grafana/dashboards/
+        # Limit CPUs and RAM for the process to not get too greedy
+        deploy:
+            resources:
+                limits:
+                    cpus: "2"
+                    memory: "2G"
+
+    temporal-webapp:
+        image: gcr.io/mcback/temporal-webapp:latest
+        init: true
+        stop_signal: SIGKILL
+        networks:
+            - default
+        expose:
+            - "8088"
+        ports:
+            # Expose to host for debugging
+            - "8088:8088"
+        # Limit CPUs and RAM for the process to not get too greedy
+        deploy:
+            resources:
+                limits:
+                    cpus: "1"
+                    memory: "2G"
+
+networks:
+    default:
+        attachable: true
+        ipam:
+            driver: default
+            config:
+                # Use same subnet as in production
+                - subnet: "10.1.0.0/16"
diff --git a/apps/temporal-webapp/.dockerignore b/apps/temporal-webapp/.dockerignore
new file mode 100644
index 0000000000..9b2c362a80
--- /dev/null
+++ b/apps/temporal-webapp/.dockerignore
@@ -0,0 +1,92 @@
+#
+# Files from the build context to be ignored by "docker build".
+#
+# You might want to add as many of constantly changing files here as possible
+# to prevent container's image from getting rebuilt every full moon.
+#
+# Unfortunately, we can't just symlink this file to every app's directory:
+#
+#     https://github.com/moby/moby/issues/12886
+#
+# so for the time being you have to manually copy this file to every app
+# subdirectory:
+#
+#     cd apps/
+#     find . -maxdepth 1 -type d \( ! -name . \) -exec bash -c "cd '{}' && cp ../dockerignore.dist ./.dockerignore" \;
+#
+
+*$py.class
+*.cover
+*.DS_Store
+*.egg
+*.egg-info/
+*.log
+*.manifest
+*.mo
+*.pot
+*.py[cod]
+*.sage.py
+*.so
+*.spec
+*.swp
+*/*.py[cod]
+*/*.swp
+*/*/*.py[cod]
+*/*/*.swp
+*/*/*/*.py[cod]
+*/*/*/*.swp
+*/*/*/__pycache__/
+*/*/__pycache__/
+*/__pycache__/
+._*
+.apdisk
+.AppleDB
+.AppleDesktop
+.AppleDouble
+.cache
+.com.apple.timemachine.donotpresent
+.coverage
+.coverage.*
+.dockerignore
+.DocumentRevisions-V100
+.DS_Store
+.eggs
+.env
+.fseventsd
+.git
+.gitignore
+.hypothesis
+.idea
+.installed.cfg
+.ipynb_checkpoints
+.LSOverride
+.mypy_cache
+.pytest_cache
+.Python
+.python-version
+.ropeproject
+.scrapy
+.Spotlight-V100
+.spyderproject
+.spyproject
+.TemporaryItems
+.tox
+.Trashes
+.venv
+.VolumeIcon.icns
+.webassets-cache
+__pycache__
+celerybeat-schedule
+coverage.xml
+Icon
+local_settings.py
+Network Trash Folder
+nosetests.xml
+parts
+pip-delete-this-directory.txt
+pip-log.txt
+sdist
+Temporary Items
+wheels
+_Inline
+
diff --git a/apps/temporal-webapp/Dockerfile b/apps/temporal-webapp/Dockerfile
new file mode 100644
index 0000000000..3f475a85b6
--- /dev/null
+++ b/apps/temporal-webapp/Dockerfile
@@ -0,0 +1,82 @@
+#
+# Temporal webapp
+#
+
+FROM gcr.io/mcback/base:latest
+
+RUN \
+    #
+    # Add NodeSource APT repository
+    curl -fsSL https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - && \
+    echo "deb https://deb.nodesource.com/node_14.x focal main" \
+        > /etc/apt/sources.list.d/nodesource.list && \
+    apt-get -y update && \
+    #
+    # Install Node.js
+    apt-get -y --no-install-recommends install nodejs && \
+    #
+    true
+
+# FIXME Vue.js still gets built in development mode
+ENV NODE_ENV=production \
+    NPM_CONFIG_PRODUCTION=true \
+    TEMPORAL_GRPC_ENDPOINT=temporal-server:7233 \
+    TEMPORAL_PERMIT_WRITE_API=true
+
+RUN \
+    #
+    # Install build dependencies
+    apt-get -y --no-install-recommends install git && \
+    #
+    # Create target directory
+    mkdir -p /opt/temporal-webapp/ && \
+    #
+    # Download Temporal webapp
+    # * We use Git instead of building a released package because we need
+    #   the submodules for the build too;
+    # * We check out a specific commit hash instead of a version tag to prevent
+    #   dependency confusion
+    #   (https://medium.com/@alex.birsan/dependency-confusion-4a5d60fec610);
+    # * We do some extra trickery to do a shallow copy of just a single commit
+    #   hash to save space + time (https://stackoverflow.com/a/43136160/200603);
+    # * Submodule is referred to as a SSH URI, so we need to make Git's SSH
+    #   work first too.
+    #
+    cd /opt/temporal-webapp/ && \
+    git init && \
+    git remote add origin https://github.com/temporalio/web.git && \
+    # HEAD of "v1.9.0" tag:
+    git fetch --depth 1 origin 6ed16d0dc07b4baf43e091028d98fa1fe7a29c06 && \
+    git checkout FETCH_HEAD && \
+    # SSH checkout doesn't work with the build container's public key not
+    # registered with GitHub
+    sed -i 's/git@github.com:/https:\/\/github.com\//g' .gitmodules && \
+    git submodule init && \
+    git submodule sync && \
+    git submodule update --init --recursive --depth 1 && \
+    #
+    # Build the webapp
+    npm install --production && \
+    npm run build-production && \
+    #
+    # Remove build dependencies
+    apt-get -y remove git && \
+    apt-get -y autoremove && \
+    apt-get -y clean && \
+    #
+    # Remove Git history as we won't need it
+    rm -rf .git/ && \
+    #
+    # Add unprivileged user the service will run as
+    useradd -ms /bin/bash temporal && \
+    #
+    true
+
+WORKDIR /opt/temporal-webapp/
+
+# Webapp port
+EXPOSE 8088
+
+USER temporal
+
+CMD ["node", "server.js"]
diff --git a/apps/tools/.idea/mediawords.sql b/apps/tools/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/tools/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/tools/.idea/sqlDataSources.xml b/apps/tools/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..006e38938a
--- /dev/null
+++ b/apps/tools/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="4c63930e-e52f-431d-b26e-c847d86ef61d" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/tools/docker-compose.tests.yml b/apps/tools/docker-compose.tests.yml
index 109d12142b..5c3191140b 100644
--- a/apps/tools/docker-compose.tests.yml
+++ b/apps/tools/docker-compose.tests.yml
@@ -45,8 +45,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/topics-base/.idea/mediawords.sql b/apps/topics-base/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/topics-base/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/topics-base/.idea/sqlDataSources.xml b/apps/topics-base/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..944fa920f0
--- /dev/null
+++ b/apps/topics-base/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="553665fb-a5e4-4bb2-88af-fdf81566f189" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/topics-base/docker-compose.tests.yml b/apps/topics-base/docker-compose.tests.yml
index 63dcc58b7d..929cfda493 100644
--- a/apps/topics-base/docker-compose.tests.yml
+++ b/apps/topics-base/docker-compose.tests.yml
@@ -93,8 +93,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/topics-extract-story-links/.idea/mediawords.sql b/apps/topics-extract-story-links/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/topics-extract-story-links/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/topics-extract-story-links/.idea/misc.xml b/apps/topics-extract-story-links/.idea/misc.xml
index feac02deed..2d65e1c063 100644
--- a/apps/topics-extract-story-links/.idea/misc.xml
+++ b/apps/topics-extract-story-links/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (topics-extract-story-links at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-extract-story-links/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (topics-extract-story-links at [/home/pypt/m/apps/topics-extract-story-links/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/topics-extract-story-links/.idea/sqlDataSources.xml b/apps/topics-extract-story-links/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..bdf32c2882
--- /dev/null
+++ b/apps/topics-extract-story-links/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="2cff7c51-ba33-4b12-a07e-81cf24df45db" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/topics-extract-story-links/.idea/topics-extract-story-links.iml b/apps/topics-extract-story-links/.idea/topics-extract-story-links.iml
index f750ca6520..09997bb235 100644
--- a/apps/topics-extract-story-links/.idea/topics-extract-story-links.iml
+++ b/apps/topics-extract-story-links/.idea/topics-extract-story-links.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (topics-extract-story-links at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-extract-story-links/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (topics-extract-story-links at [/home/pypt/m/apps/topics-extract-story-links/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/topics-extract-story-links/docker-compose.tests.yml b/apps/topics-extract-story-links/docker-compose.tests.yml
index 80d21f6a60..380509c5f1 100644
--- a/apps/topics-extract-story-links/docker-compose.tests.yml
+++ b/apps/topics-extract-story-links/docker-compose.tests.yml
@@ -75,5 +75,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/topics-fetch-link/.idea/mediawords.sql b/apps/topics-fetch-link/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/topics-fetch-link/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/topics-fetch-link/.idea/misc.xml b/apps/topics-fetch-link/.idea/misc.xml
index d17c2acb4c..1fcaee9c6f 100644
--- a/apps/topics-fetch-link/.idea/misc.xml
+++ b/apps/topics-fetch-link/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (topics-fetch-link at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-fetch-link/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (topics-fetch-link at [/home/pypt/m/apps/topics-fetch-link/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/topics-fetch-link/.idea/sqlDataSources.xml b/apps/topics-fetch-link/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..7cc1658bf3
--- /dev/null
+++ b/apps/topics-fetch-link/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="a5f854e3-7e45-4163-b6ab-6ae47802b26c" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/topics-fetch-link/.idea/topics-fetch-link.iml b/apps/topics-fetch-link/.idea/topics-fetch-link.iml
index 0d2b068e16..681b64f8cc 100644
--- a/apps/topics-fetch-link/.idea/topics-fetch-link.iml
+++ b/apps/topics-fetch-link/.idea/topics-fetch-link.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (topics-fetch-link at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-fetch-link/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (topics-fetch-link at [/home/pypt/m/apps/topics-fetch-link/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/topics-fetch-link/docker-compose.tests.yml b/apps/topics-fetch-link/docker-compose.tests.yml
index e5f84171bf..c4b500e35e 100644
--- a/apps/topics-fetch-link/docker-compose.tests.yml
+++ b/apps/topics-fetch-link/docker-compose.tests.yml
@@ -93,8 +93,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/topics-fetch-twitter-urls/.idea/mediawords.sql b/apps/topics-fetch-twitter-urls/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/topics-fetch-twitter-urls/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/topics-fetch-twitter-urls/.idea/misc.xml b/apps/topics-fetch-twitter-urls/.idea/misc.xml
index 2a84bb3ad3..1541a3b4a0 100644
--- a/apps/topics-fetch-twitter-urls/.idea/misc.xml
+++ b/apps/topics-fetch-twitter-urls/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (topics-fetch-twitter-urls at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-fetch-twitter-urls/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (topics-fetch-twitter-urls at [/home/pypt/m/apps/topics-fetch-twitter-urls/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/topics-fetch-twitter-urls/.idea/sqlDataSources.xml b/apps/topics-fetch-twitter-urls/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..823ea3ad86
--- /dev/null
+++ b/apps/topics-fetch-twitter-urls/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="9b26f133-0b7c-4bb2-ad93-5c610079881e" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/topics-fetch-twitter-urls/.idea/topics-fetch-twitter-urls.iml b/apps/topics-fetch-twitter-urls/.idea/topics-fetch-twitter-urls.iml
index 30a0de4fb1..3a6d60fde3 100644
--- a/apps/topics-fetch-twitter-urls/.idea/topics-fetch-twitter-urls.iml
+++ b/apps/topics-fetch-twitter-urls/.idea/topics-fetch-twitter-urls.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (topics-fetch-twitter-urls at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-fetch-twitter-urls/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (topics-fetch-twitter-urls at [/home/pypt/m/apps/topics-fetch-twitter-urls/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/topics-fetch-twitter-urls/docker-compose.tests.yml b/apps/topics-fetch-twitter-urls/docker-compose.tests.yml
index 63fe1cd6d4..74bd3fd650 100644
--- a/apps/topics-fetch-twitter-urls/docker-compose.tests.yml
+++ b/apps/topics-fetch-twitter-urls/docker-compose.tests.yml
@@ -97,8 +97,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/topics-map/.idea/mediawords.sql b/apps/topics-map/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/topics-map/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/topics-map/.idea/sqlDataSources.xml b/apps/topics-map/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..b8c792255c
--- /dev/null
+++ b/apps/topics-map/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="a41645c8-11a2-4094-9efd-686d6d626861" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/topics-map/Dockerfile b/apps/topics-map/Dockerfile
index 208b99b8fd..3b61e8bd37 100644
--- a/apps/topics-map/Dockerfile
+++ b/apps/topics-map/Dockerfile
@@ -5,7 +5,10 @@
 FROM gcr.io/mcback/common:latest
 
 # Install Java
-RUN apt-get -y --no-install-recommends install openjdk-8-jre-headless
+RUN \ 
+    apt-get -y update && \ 
+    apt-get -y --no-install-recommends install openjdk-8-jre-headless && \
+    true
 
 # Install fa2l Java libs
 RUN \
diff --git a/apps/topics-map/docker-compose.tests.yml b/apps/topics-map/docker-compose.tests.yml
index 82438fcb22..d7f3d0ac2c 100644
--- a/apps/topics-map/docker-compose.tests.yml
+++ b/apps/topics-map/docker-compose.tests.yml
@@ -55,6 +55,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
-
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/apps/topics-mine/docker-compose.tests.yml b/apps/topics-mine/docker-compose.tests.yml
index ed2e90a371..20562aa1ee 100644
--- a/apps/topics-mine/docker-compose.tests.yml
+++ b/apps/topics-mine/docker-compose.tests.yml
@@ -112,8 +112,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     rabbitmq-server:
         image: gcr.io/mcback/rabbitmq-server:latest
diff --git a/apps/topics-mine/src/python/topics_mine/posts/__init__.py b/apps/topics-mine/src/python/topics_mine/posts/__init__.py
index 4465abaf93..1b62e05a95 100644
--- a/apps/topics-mine/src/python/topics_mine/posts/__init__.py
+++ b/apps/topics-mine/src/python/topics_mine/posts/__init__.py
@@ -27,7 +27,7 @@ def fetch_posts_from_api(
         sample: Optional[int] = None,
         page_size: Optional[int] = None,
     ) -> list:
-        raise NotImplemented("Abstract method")
+        raise NotImplementedError("Abstract method")
 
     def validate_mock_post(self, got_post: dict, expected_post: dict) -> None:
         """Validate that got_post matches expected_post.
diff --git a/apps/topics-snapshot/.idea/mediawords.sql b/apps/topics-snapshot/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/topics-snapshot/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/topics-snapshot/.idea/misc.xml b/apps/topics-snapshot/.idea/misc.xml
index 3a0005b7b1..ef39d9adbd 100644
--- a/apps/topics-snapshot/.idea/misc.xml
+++ b/apps/topics-snapshot/.idea/misc.xml
@@ -6,5 +6,5 @@
   <component name="NodePackageJsonFileManager">
     <packageJsonPaths />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.3 Docker Compose (topics-snapshot at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-snapshot/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.8.5 Docker Compose (topics-snapshot at [/home/pypt/m/apps/topics-snapshot/docker-compose.tests.yml])" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/apps/topics-snapshot/.idea/sqlDataSources.xml b/apps/topics-snapshot/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..bea78ab59b
--- /dev/null
+++ b/apps/topics-snapshot/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="3dc3617f-f12c-42d5-b17c-d034554bd192" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/topics-snapshot/.idea/topics-snapshot.iml b/apps/topics-snapshot/.idea/topics-snapshot.iml
index b92bf463b9..64cb979160 100644
--- a/apps/topics-snapshot/.idea/topics-snapshot.iml
+++ b/apps/topics-snapshot/.idea/topics-snapshot.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Remote Python 3.7.3 Docker Compose (topics-snapshot at [/Users/pypt/Dropbox/etc-MediaCloud/trunk/apps/topics-snapshot/docker-compose.tests.yml])" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.8.5 Docker Compose (topics-snapshot at [/home/pypt/m/apps/topics-snapshot/docker-compose.tests.yml])" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PackageRequirementsSettings">
diff --git a/apps/topics-snapshot/docker-compose.tests.yml b/apps/topics-snapshot/docker-compose.tests.yml
index d0dbe3db4c..bc0a845028 100644
--- a/apps/topics-snapshot/docker-compose.tests.yml
+++ b/apps/topics-snapshot/docker-compose.tests.yml
@@ -97,8 +97,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/webapp-api/.idea/mediawords.sql b/apps/webapp-api/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/webapp-api/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/webapp-api/.idea/sqlDataSources.xml b/apps/webapp-api/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..08b89ea572
--- /dev/null
+++ b/apps/webapp-api/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="f99cd3d7-1bcb-427e-bfae-b192ae6c8535" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/webapp-api/docker-compose.tests.yml b/apps/webapp-api/docker-compose.tests.yml
index 5f2963bc66..af67c24c42 100644
--- a/apps/webapp-api/docker-compose.tests.yml
+++ b/apps/webapp-api/docker-compose.tests.yml
@@ -69,8 +69,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/StoriesBase.pm b/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/StoriesBase.pm
index 2e1512d9c4..15a6c2df1c 100644
--- a/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/StoriesBase.pm
+++ b/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/StoriesBase.pm
@@ -14,6 +14,7 @@ use namespace::autoclean;
 
 use MediaWords::DBI::Stories;
 use MediaWords::DBI::Stories::WordMatrix;
+use MediaWords::DBI::Stories::WordMatrixOldStopwords;   # FIXME remove once stopword comparison is over
 use MediaWords::Solr;
 use MediaWords::Solr::TagCounts;
 use MediaWords::Util::ParseHTML;
@@ -165,16 +166,23 @@ SQL
 }
 
 # add a word_count field to each story that includes a word count for that story
-sub _attach_word_counts_to_stories($$)
+# FIXME remove extra "$" once stopword comparison is over
+sub _attach_word_counts_to_stories($$$)
 {
-    my ( $db, $stories ) = @_;
+    # FIXME remove extra parameter once stopword comparison is over
+    my ( $db, $stories, $old_stopwords ) = @_;
 
     my $stories_ids = [ map { $_->{ stories_id } } @{ $stories } ];
 
     my $stories_lookup = {};
     map { $stories_lookup->{ $_->{ stories_id } } = $_ } @{ $stories };
 
-    my ( $word_matrix, $word_list ) = MediaWords::DBI::Stories::WordMatrix::get_story_word_matrix( $db, $stories_ids );
+    my ( $word_matrix, $word_list );
+    if ( $old_stopwords ) {
+        ( $word_matrix, $word_list ) = MediaWords::DBI::Stories::WordMatrixOldStopwords::get_story_word_matrix( $db, $stories_ids );
+    } else {
+        ( $word_matrix, $word_list ) = MediaWords::DBI::Stories::WordMatrix::get_story_word_matrix( $db, $stories_ids );
+    }
 
     while ( my ( $stories_id, $word_counts ) = each( %{ $word_matrix } ) )
     {
@@ -322,7 +330,9 @@ SQL
         $stories = MediaWords::DBI::Stories::attach_story_data_to_stories( $stories, $feed_data, 'feeds' );
     }
 
-    $stories = _attach_word_counts_to_stories( $db, $stories ) if ( int( $self->{ show_wc } // 0 ) );
+    if ( int( $self->{ show_wc } // 0 ) ) {
+        $stories = _attach_word_counts_to_stories( $db, $stories, $self->{ old_stopwords } );
+    }
 
     return $stories;
 }
@@ -381,6 +391,8 @@ sub _fetch_list($$$$$$)
     $self->{ show_text }          = int( $c->req->params->{ text }          // 0 );
     $self->{ show_ap_stories_id } = int( $c->req->params->{ ap_stories_id } // 0 );
     $self->{ show_wc }            = int( $c->req->params->{ wc }            // 0 );
+    # FIXME remove once stopword comparison is over
+    $self->{ old_stopwords }      = int( $c->req->params->{ old_stopwords } // 0 );
     $self->{ show_feeds }         = int( $c->req->params->{ show_feeds }    // 0 );
 
     $rows //= 20;
@@ -544,7 +556,13 @@ sub word_matrix_GET
     my $stories_ids =
       MediaWords::Solr::search_solr_for_stories_ids( $db, { q => $q, fq => $fq, rows => $rows, sort => 'random_1 asc' } );
 
-    my ( $word_matrix, $word_list ) = MediaWords::DBI::Stories::WordMatrix::get_story_word_matrix( $db, $stories_ids );
+    my ( $word_matrix, $word_list );
+    if ( $c->req->params->{ old_stopwords } ) {
+        # FIXME remove once stopword comparison is over
+        ( $word_matrix, $word_list ) = MediaWords::DBI::Stories::WordMatrixOldStopwords::get_story_word_matrix( $db, $stories_ids );
+    } else {
+        ( $word_matrix, $word_list ) = MediaWords::DBI::Stories::WordMatrix::get_story_word_matrix( $db, $stories_ids );
+    }
 
     $self->status_ok( $c, entity => { word_matrix => $word_matrix, word_list => $word_list } );
 
diff --git a/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Wc.pm b/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Wc.pm
index c3c39f024f..d1c560185d 100644
--- a/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Wc.pm
+++ b/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Wc.pm
@@ -9,6 +9,9 @@ use List::Util qw(first max maxstr min minstr reduce shuffle sum);
 use Moose;
 use namespace::autoclean;
 use MediaWords::Solr;
+use MediaWords::Solr::WordCounts;
+use MediaWords::Solr::WordCountsOldStopwords;   # FIXME remove once stopword comparison is over
+
 
 =head1 NAME
 
@@ -47,7 +50,13 @@ sub list_GET : PathPrefix( '/api' )
 
     $c->req->params->{ sample_size } = $sample_size;
 
-    my $wc = MediaWords::Solr::WordCounts->new( { db => $c->dbis, cgi_params => $c->req->params } );
+    my $wc;
+    if ( $c->req->params->{ old_stopwords } ) {
+        # FIXME remove once stopword comparison is over
+        $wc = MediaWords::Solr::WordCountsOldStopwords->new( { db => $c->dbis, cgi_params => $c->req->params } );
+    } else {
+        $wc = MediaWords::Solr::WordCounts->new( { db => $c->dbis, cgi_params => $c->req->params } );
+    }
 
     my $words = $wc->get_words;
 
diff --git a/apps/webapp-api/src/perl/MediaWords/DBI/Stories/WordMatrixOldStopwords.pm b/apps/webapp-api/src/perl/MediaWords/DBI/Stories/WordMatrixOldStopwords.pm
new file mode 100644
index 0000000000..b664d8116d
--- /dev/null
+++ b/apps/webapp-api/src/perl/MediaWords/DBI/Stories/WordMatrixOldStopwords.pm
@@ -0,0 +1,152 @@
+# FIXME remove once stopword comparison is over
+package MediaWords::DBI::Stories::WordMatrixOldStopwords;
+
+use strict;
+use warnings;
+
+use Modern::Perl "2015";
+use MediaWords::CommonLibs;
+
+use List::MoreUtils qw(natatime);
+
+use MediaWords::Solr::WordCountsOldStopwords;
+
+# get a postgres cursor that will return the concatenated story_sentences for each of the given stories_ids.  use
+# $sentence_separator to join the sentences for each story.
+sub _get_story_word_matrix_rows($$$)
+{
+    my ( $db, $stories_ids, $sentence_separator ) = @_;
+
+    return [] unless ( @{ $stories_ids } );
+
+    my $stories_ids_list = join( ',', map { int( $_ ) } @{ $stories_ids } );
+
+    my $ids_table = $db->get_temporary_ids_table( $stories_ids );
+    my $rows = $db->query( <<SQL, $sentence_separator )->hashes;
+select stories_id, language, string_agg( sentence, \$1 ) story_text
+    from story_sentences
+    where stories_id in ( $stories_ids_list )
+    group by stories_id, language
+    order by stories_id, language
+SQL
+
+    return $rows;
+}
+
+# Given a list of stories_ids, generate a matrix consisting of the vector of word stem counts for each stories_id on each
+# line.  Return a hash of story word counts and a list of word stems.
+#
+# The list of story word counts is in the following format:
+# {
+#     { <stories_id> =>
+#         { <word_id_1> => <count>,
+#           <word_id_2 => <count>
+#         }
+#     },
+#     ...
+# ]
+#
+# The id of each word is the indes of the given word in the word list.  The word list is a list of lists, with each
+# member list consisting of the stem followed by the most commonly used term.
+#
+# For example, for stories_ids 1 and 2, both of which contain 4 mentions of 'foo' and 10 of 'bars', the word count
+# has and and word list look like:
+#
+# [ { 1 => { 0 => 4, 1 => 10 } }, { 2 => { 0 => 4, 1 => 10 } } ]
+#
+# [ [ 'foo', 'foo' ], [ 'bar', 'bars' ] ]
+#
+# The story_sentences for each story will be used for word counting. If $max_words is specified, only the most common
+# $max_words will be used for each story.
+#
+# The function uses MediaWords::Util::IdentifyLanguage to identify the stemming and stopwording language for each story.
+# If the language of a given story is not supported, stemming and stopwording become null operations.  For the list of
+# languages supported, see @MediaWords::Langauges::Language::_supported_languages.
+sub get_story_word_matrix($$;$)
+{
+    my ( $db, $stories_ids, $max_words ) = @_;
+
+    my $word_index_lookup   = {};
+    my $word_index_sequence = 0;
+    my $word_term_counts    = {};
+
+    my $use_transaction = !$db->in_transaction();
+    $db->begin if ( $use_transaction );
+
+    my $sentence_separator = 'SPLITSPLIT';
+    my $story_text_cursor = 
+
+    my $word_matrix = {};
+    my $iter = natatime( 100, @{ $stories_ids } );
+    while ( my @chunk_stories_ids = $iter->() )
+    {
+        my $stories = _get_story_word_matrix_rows( $db, \@chunk_stories_ids, $sentence_separator );
+
+        for my $story ( @{ $stories } )
+        {
+            my $wc = MediaWords::Solr::WordCountsOldStopwords->new();
+
+            # Remove stopwords from the stems
+            $wc->include_stopwords( 0 );
+
+            my $sentences_and_story_languages = [];
+            for my $sentence ( split( $sentence_separator, $story->{ story_text } ) )
+            {
+                push(
+                    @{ $sentences_and_story_languages },
+                    {
+                        'story_language' => $story->{ language },
+                        'sentence'       => $sentence,
+                    }
+                );
+            }
+
+            my $stem_counts = $wc->count_stems( $sentences_and_story_languages );
+
+            my $stem_count_list = [];
+            while ( my ( $stem, $data ) = each( %{ $stem_counts } ) )
+            {
+                push( @{ $stem_count_list }, [ $stem, $data->{ count }, $data->{ terms } ] );
+            }
+
+            if ( $max_words )
+            {
+                $stem_count_list = [ sort { $b->[ 1 ] <=> $a->[ 1 ] } @{ $stem_count_list } ];
+                splice( @{ $stem_count_list }, 0, $max_words );
+            }
+
+            $word_matrix->{ $story->{ stories_id } } //= {};
+            my $stem_vector = $word_matrix->{ $story->{ stories_id } };
+            for my $stem_count ( @{ $stem_count_list } )
+            {
+                my ( $stem, $count, $terms ) = @{ $stem_count };
+
+                $word_index_lookup->{ $stem } //= $word_index_sequence++;
+                my $index = $word_index_lookup->{ $stem };
+
+                $stem_vector->{ $index } += $count;
+
+                map { $word_term_counts->{ $stem }->{ $_ } += $terms->{ $_ } } keys( %{ $terms } );
+            }
+        }
+    }
+
+    $db->commit if ( $use_transaction );
+
+    my $word_list = [];
+    for my $stem ( keys( %{ $word_index_lookup } ) )
+    {
+        my $term_pairs = [];
+        while ( my ( $term, $count ) = each( %{ $word_term_counts->{ $stem } } ) )
+        {
+            push( @{ $term_pairs }, [ $term, $count ] );
+        }
+
+        $term_pairs = [ sort { $b->[ 1 ] <=> $a->[ 1 ] } @{ $term_pairs } ];
+        $word_list->[ $word_index_lookup->{ $stem } ] = [ $stem, $term_pairs->[ 0 ]->[ 0 ] ];
+    }
+
+    return ( $word_matrix, $word_list );
+}
+
+1;
diff --git a/apps/webapp-api/src/perl/MediaWords/Solr/WordCountsOldStopwords.pm b/apps/webapp-api/src/perl/MediaWords/Solr/WordCountsOldStopwords.pm
new file mode 100644
index 0000000000..8af5a061e5
--- /dev/null
+++ b/apps/webapp-api/src/perl/MediaWords/Solr/WordCountsOldStopwords.pm
@@ -0,0 +1,447 @@
+# FIXME remove once stopword comparison is over
+package MediaWords::Solr::WordCountsOldStopwords;
+
+use Moose;
+
+=head1 NAME
+
+MediaWords::Solr::WordCounts - handle word counting from solr
+
+=head1 DESCRIPTION
+
+Uses sampling to generate quick word counts from solr queries.
+
+=cut
+
+use strict;
+use warnings;
+use utf8;
+
+use Modern::Perl "2015";
+use MediaWords::CommonLibs;
+
+use CHI;
+use Data::Dumper;
+use Encode;
+use List::Util;
+use Readonly;
+use URI::Escape;
+
+use MediaWords::Languages::Language;
+use MediaWords::Solr;
+use MediaWords::Solr::Query::MatchingSentences;
+use MediaWords::Util::ParseJSON;
+use MediaWords::Util::Text;
+
+# Max. length of the sentence to tokenize
+Readonly my $MAX_SENTENCE_LENGTH => 1024;
+
+# Max. number of times to count a word in a single sentence
+Readonly my $MAX_REPEATS_PER_SENTENCE => 3;
+
+# mediawords.wc_cache_version from config
+my $_wc_cache_version;
+
+# Moose instance fields
+
+has 'q'                         => ( is => 'rw', isa => 'Str' );
+has 'fq'                        => ( is => 'rw', isa => 'ArrayRef' );
+has 'num_words'                 => ( is => 'rw', isa => 'Int', default => 500 );
+has 'sample_size'               => ( is => 'rw', isa => 'Int', default => 1000 );
+has 'random_seed'               => ( is => 'rw', isa => 'Int', default => 1 );
+has 'ngram_size'                => ( is => 'rw', isa => 'Int', default => 1 );
+has 'include_stopwords'         => ( is => 'rw', isa => 'Bool' );
+has 'include_stats'             => ( is => 'rw', isa => 'Bool' );
+has 'cached_combined_stopwords' => ( is => 'rw', isa => 'HashRef' );
+has 'db' => ( is => 'rw' );
+
+# list of all attribute names that should be exposed as cgi params
+sub __get_cgi_param_attributes()
+{
+    return [ qw(q fq num_words sample_size random_seed include_stopwords include_stats ngram_size) ];
+}
+
+# return hash of attributes for use as cgi params
+sub _get_cgi_param_hash($)
+{
+    my ( $self ) = @_;
+
+    my $keys = __get_cgi_param_attributes();
+
+    my $meta = $self->meta;
+
+    my $hash = {};
+    map { $hash->{ $_ } = $meta->get_attribute( $_ )->get_value( $self ) } @{ $keys };
+
+    return $hash;
+}
+
+# add support for constructor in this form:
+#   WordsCounts->new( cgi_params => $cgi_params )
+# where $cgi_params is a hash of cgi params directly from a web request
+around BUILDARGS => sub {
+    my $orig  = shift;
+    my $class = shift;
+
+    my $args;
+    if ( ref( $_[ 0 ] ) )
+    {
+        $args = $_[ 0 ];
+    }
+    elsif ( defined( $_[ 0 ] ) )
+    {
+        $args = { @_ };
+    }
+    else
+    {
+        $args = {};
+    }
+
+    my $vals;
+    if ( $args->{ cgi_params } )
+    {
+        my $cgi_params = $args->{ cgi_params };
+
+        $vals = {};
+        my $keys = __get_cgi_param_attributes();
+        for my $key ( @{ $keys } )
+        {
+            if ( exists( $cgi_params->{ $key } ) )
+            {
+                $vals->{ $key } = $cgi_params->{ $key };
+            }
+        }
+
+        if ( $args->{ db } )
+        {
+            $vals->{ db } = $args->{ db };
+        }
+    }
+    else
+    {
+        $vals = $args;
+    }
+
+    if ( $vals->{ fq } && !ref( $vals->{ fq } ) )
+    {
+        $vals->{ fq } = [ $vals->{ fq } ];
+    }
+
+    $vals->{ fq } ||= [];
+
+    return $class->$orig( $vals );
+};
+
+# Cache merged hashes of stopwords for speed
+sub _combine_stopwords($$)
+{
+    my ( $self, $languages ) = @_;
+
+    unless ( ref( $languages ) eq ref( [] ) )
+    {
+        die "Languages is not an arrayref.";
+    }
+    unless ( scalar( @{ $languages } ) > 0 )
+    {
+        die "Languages should have at least one language set.";
+    }
+
+    my $language_lookup   = {};
+    my $deduped_languages = [];
+    for my $language ( @{ $languages } )
+    {
+        unless ( $language_lookup->{ $language->language_code() } )
+        {
+            push( @{ $deduped_languages }, $language );
+            $language_lookup->{ $language->language_code() } = 1;
+        }
+    }
+
+    $languages = $deduped_languages;
+
+    my $language_codes = [];
+    foreach my $language ( @{ $languages } )
+    {
+        push( @{ $language_codes }, $language->language_code() );
+    }
+    $language_codes = [ sort( @{ $language_codes } ) ];
+
+    my $cache_key = join( '-', @{ $language_codes } );
+
+    unless ( $self->cached_combined_stopwords() )
+    {
+        $self->cached_combined_stopwords( {} );
+    }
+
+    unless ( defined $self->cached_combined_stopwords->{ $cache_key } )
+    {
+        my $combined_stopwords = {};
+        foreach my $language ( @{ $languages } )
+        {
+            my $stopwords = $language->stop_words_old_map();
+            $combined_stopwords = { ( %{ $combined_stopwords }, %{ $stopwords } ) };
+        }
+
+        $self->cached_combined_stopwords->{ $cache_key } = $combined_stopwords;
+    }
+
+    return $self->cached_combined_stopwords->{ $cache_key };
+}
+
+# expects story_sentence hashes, with a story_language field.
+#
+# parse the text and return a count of stems and terms in the sentence in the
+# following format:
+#
+# { $stem => { count => $stem_count, terms => { $term => $term_count, ... } } }
+#
+# if ngram_size is > 1, use the unstemmed phrases of ngram_size as the stems
+sub count_stems($$)
+{
+    my ( $self, $story_sentences ) = @_;
+
+    # Set any duplicate sentences blank
+    my $dup_sentences = {};
+
+    # Tokenize each sentence and add count to $words for each token
+    my $stem_counts = {};
+    for my $story_sentence ( @{ $story_sentences } )
+    {
+        next unless ( defined( $story_sentence ) );
+
+        my $sentence = $story_sentence->{ 'sentence' };
+        next unless ( defined( $sentence ) );
+
+        next if ( $dup_sentences->{ $sentence } );
+        $dup_sentences->{ $sentence } = 1;
+
+        # Very long sentences tend to be noise -- html text and the like.
+        $sentence = substr( $sentence, 0, $MAX_SENTENCE_LENGTH ) if ( length( $sentence ) > $MAX_SENTENCE_LENGTH );
+
+        # Remove urls so they don't get tokenized into noise
+        if ( $sentence =~ m~https?://[^\s]+~i )
+        {
+            $sentence =~ s~https?://[^\s]+~~gi;
+        }
+
+        my $story_language    = $story_sentence->{ 'story_language' } || 'en';
+        my $sentence_language = $story_sentence->{ language }         || 'en';
+
+        # Language objects are cached in ::Languages::Language, no need to have a separate cache
+        my $lang_en       = MediaWords::Languages::Language::default_language();
+        my $lang_story    = MediaWords::Languages::Language::language_for_code( $story_language ) || $lang_en;
+        my $lang_sentence = MediaWords::Languages::Language::language_for_code( $sentence_language ) || $lang_en;
+
+        # Tokenize into words
+        my $sentence_words = $lang_sentence->split_sentence_to_words( $sentence );
+
+        # Remove stopwords;
+        # (don't stem stopwords first as they will usually be stemmed too much)
+        my $combined_stopwords = {};
+        unless ( $self->include_stopwords )
+        {
+            # Use both sentence's language and English stopwords
+            $combined_stopwords = $self->_combine_stopwords( [ $lang_en, $lang_story, $lang_sentence ] );
+        }
+
+        sub _word_is_valid_token($$)
+        {
+            my ( $word, $stopwords ) = @_;
+
+            # Remove numbers
+            if ( $word =~ /^\d+?$/ )
+            {
+                return 0;
+            }
+
+            # Remove stopwords
+            if ( $stopwords->{ $word } )
+            {
+                return 0;
+            }
+
+            return 1;
+        }
+
+        $sentence_words = [ grep { _word_is_valid_token( $_, $combined_stopwords ) } @{ $sentence_words } ];
+
+        # Stem using sentence language's algorithm
+        my $sentence_word_stems =
+          ( $self->ngram_size > 1 ) ? $sentence_words : $lang_sentence->stem_words( $sentence_words );
+
+        my $n          = $self->ngram_size;
+        my $num_ngrams = scalar( @{ $sentence_words } ) - $n + 1;
+
+        my $sentence_stem_counts = {};
+
+        for ( my $i = 0 ; $i < $num_ngrams ; ++$i )
+        {
+            my $term = join( ' ', @{ $sentence_words }[ $i ..      ( $i + $n - 1 ) ] );
+            my $stem = join( ' ', @{ $sentence_word_stems }[ $i .. ( $i + $n - 1 ) ] );
+
+            $sentence_stem_counts->{ $stem } //= {};
+            ++$sentence_stem_counts->{ $stem }->{ count };
+
+            next if ( $sentence_stem_counts->{ $stem }->{ count } > $MAX_REPEATS_PER_SENTENCE );
+
+            $stem_counts->{ $stem } //= {};
+            ++$stem_counts->{ $stem }->{ count };
+
+            $stem_counts->{ $stem }->{ terms } //= {};
+            ++$stem_counts->{ $stem }->{ terms }->{ $term };
+        }
+    }
+
+    return $stem_counts;
+}
+
+# connect to solr server directly and count the words resulting from the query
+sub _get_words_from_solr_server($)
+{
+    my ( $self ) = @_;
+
+    my $db = $self->db;
+
+    unless ( $self->q() || ( $self->fq && @{ $self->fq } ) )
+    {
+        return [];
+    }
+
+    my $solr_params = {
+        q    => $self->q(),
+        fq   => $self->fq,
+        rows => $self->sample_size,
+        sort => 'random_' . $self->random_seed . ' asc'
+    };
+
+    DEBUG( "executing solr query ..." );
+    DEBUG Dumper( $solr_params );
+
+    my $story_sentences = MediaWords::Solr::Query::MatchingSentences::query_matching_sentences( $self->db, $solr_params, $self->sample_size );
+
+    DEBUG( "counting sentences..." );
+    my $words = $self->count_stems( $story_sentences );
+    DEBUG( "done counting sentences" );
+
+    my @word_list;
+    while ( my ( $stem, $count ) = each( %{ $words } ) )
+    {
+        push( @word_list, { stem => $stem, count => $count->{ count } } );
+    }
+
+    @word_list = sort {
+        $b->{ count } <=> $a->{ count } or    #
+          $b->{ stem } cmp $a->{ stem }       #
+    } @word_list;
+
+    my $counts = [];
+    for my $w ( @word_list )
+    {
+        my $terms = $words->{ $w->{ stem } }->{ terms };
+        my ( $max_term, $max_term_count );
+        while ( my ( $term, $term_count ) = each( %{ $terms } ) )
+        {
+            if ( !$max_term || ( $term_count > $max_term_count ) )
+            {
+                $max_term       = $term;
+                $max_term_count = $term_count;
+            }
+        }
+
+        if ( !MediaWords::Util::Text::is_valid_utf8( $w->{ stem } ) || !MediaWords::Util::Text::is_valid_utf8( $max_term ) )
+        {
+            WARN "invalid utf8: $w->{ stem } / $max_term";
+            next;
+        }
+
+        push( @{ $counts }, { stem => $w->{ stem }, count => $w->{ count }, term => $max_term } );
+    }
+
+    splice( @{ $counts }, $self->num_words );
+
+    if ( $self->include_stats )
+    {
+        return {
+            stats => {
+                num_words_returned     => scalar( @{ $counts } ),
+                num_sentences_returned => scalar( @{ $story_sentences } ),
+                num_words_param        => $self->num_words,
+                sample_size_param      => $self->sample_size,
+                random_seed            => $self->random_seed
+            },
+            words => $counts
+        };
+    }
+    else
+    {
+        return $counts;
+    }
+}
+
+# return CHI cache for word counts
+sub _get_cache
+{
+    return CHI->new(
+        driver           => 'File',
+        expires_in       => '1 day',
+        expires_variance => '0.1',
+        root_dir         => "/var/cache/word_counts",
+        depth            => 4
+    );
+}
+
+# return key that uniquely identifies the query
+sub _get_cache_key
+{
+    my ( $self ) = @_;
+
+    $_wc_cache_version = '1';
+
+    my $meta = $self->meta;
+
+    my $keys = $self->__get_cgi_param_attributes();
+
+    my $hash_key = "$_wc_cache_version:" . Dumper( map { $meta->get_attribute( $_ )->get_value( $self ) } @{ $keys } );
+
+    return $hash_key;
+}
+
+# get a cached value for the given word count
+sub _get_cached_words
+{
+    my ( $self ) = @_;
+
+    return $self->_get_cache->get( $self->_get_cache_key );
+}
+
+# set a cached value for the given word count
+sub _set_cached_words
+{
+    my ( $self, $value ) = @_;
+
+    return $self->_get_cache->set( $self->_get_cache_key, $value );
+}
+
+# get sorted list of most common words in sentences matching a Solr query,
+# exclude stop words. Assumes english stemming and stopwording for now.
+sub get_words
+{
+    my ( $self ) = @_;
+
+    my $words;
+
+    $words = $self->_get_cached_words;
+
+    if ( $words )
+    {
+        return $words;
+    }
+
+    $words ||= $self->_get_words_from_solr_server();
+
+    $self->_set_cached_words( $words );
+
+    return $words;
+}
+
+1;
diff --git a/apps/webapp-httpd/docker-compose.tests.yml b/apps/webapp-httpd/docker-compose.tests.yml
index 4569acad28..a422bcb5b7 100644
--- a/apps/webapp-httpd/docker-compose.tests.yml
+++ b/apps/webapp-httpd/docker-compose.tests.yml
@@ -98,8 +98,8 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
 
     solr-shard-01:
         image: gcr.io/mcback/solr-shard:latest
diff --git a/apps/word2vec-generate-snapshot-model/.idea/mediawords.sql b/apps/word2vec-generate-snapshot-model/.idea/mediawords.sql
new file mode 120000
index 0000000000..08fc9a64b9
--- /dev/null
+++ b/apps/word2vec-generate-snapshot-model/.idea/mediawords.sql
@@ -0,0 +1 @@
+../../postgresql-server/schema/mediawords.sql
\ No newline at end of file
diff --git a/apps/word2vec-generate-snapshot-model/.idea/sqlDataSources.xml b/apps/word2vec-generate-snapshot-model/.idea/sqlDataSources.xml
new file mode 100644
index 0000000000..7a90f0188b
--- /dev/null
+++ b/apps/word2vec-generate-snapshot-model/.idea/sqlDataSources.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDataSourceStorage">
+    <option name="dataSources">
+      <list>
+        <State>
+          <option name="id" value="e954f105-7622-45c9-92b0-a2619a84e67d" />
+          <option name="name" value="DDL data source" />
+          <option name="urls">
+            <array>
+              <option value="file://$PROJECT_DIR$/.idea/mediawords.sql" />
+            </array>
+          </option>
+        </State>
+      </list>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/apps/word2vec-generate-snapshot-model/docker-compose.tests.yml b/apps/word2vec-generate-snapshot-model/docker-compose.tests.yml
index 294d99757a..fdc9cf0a10 100644
--- a/apps/word2vec-generate-snapshot-model/docker-compose.tests.yml
+++ b/apps/word2vec-generate-snapshot-model/docker-compose.tests.yml
@@ -49,5 +49,5 @@ services:
               source: ./../postgresql-server/schema/
               target: /opt/mediacloud/schema/
             - type: bind
-              source: ./../postgresql-server/conf/
-              target: /etc/postgresql/11/main/
+              source: ./../postgresql-base/conf/
+              target: /etc/postgresql/13/main/
diff --git a/doc/docker_cheat_sheet.markdown b/doc/docker_cheat_sheet.markdown
index 2338c1ea67..cb2c21b191 100644
--- a/doc/docker_cheat_sheet.markdown
+++ b/doc/docker_cheat_sheet.markdown
@@ -91,7 +91,7 @@ To access PostgreSQL directly, you can either run `psql` in a `postgresql-server
 
      ```bash
      $ docker exec -it 29a psql
-     psql (11.3 (Ubuntu 11.3-1.pgdg20.04+1))
+     psql (13.3 (Ubuntu 11.3-1.pgdg20.04+1))
      Type "help" for help.
      
      mediacloud=# 
@@ -147,7 +147,7 @@ To access PostgreSQL directly, you can either run `psql` in a `postgresql-server
      Password for user mediacloud: 
      Timing is on.
      Expanded display is on.
-     psql (11.3)
+     psql (13.3)
      Type "help" for help.
      
      mediacloud=# 
diff --git a/doc/podcasts_gc_auth.markdown b/doc/podcasts_gc_auth.markdown
index 88e98fbcc7..a43366039c 100644
--- a/doc/podcasts_gc_auth.markdown
+++ b/doc/podcasts_gc_auth.markdown
@@ -60,13 +60,20 @@ In order to transcribe podcast episodes using Google Cloud's Speech API, you'll
     gcloud services enable speech.googleapis.com
     ```
 
-9. Create a Cloud Storage bucket to store episode audio files (if one doesn't exist already):
+9. Create three Cloud Storage buckets:
 
     ```shell
-    gsutil mb gs://mc-podcast-episodes-audio-files-test
+    # Raw (non-transcoded) enclosures fetched from podcast websites
+    gsutil mb gs://mc-podcast-raw-enclosures-test
+
+    # Transcoded episodes ready for submission to the Speech API
+    gsutil mb gs://mc-podcast-transcoded-episodes-test
+
+    # Transcript JSON files
+    gsutil mb gs://mc-podcast-transcripts-test
     ```
 
-10. Create a service account that the podcast transcribing apps would use:
+10. Create a service account that the podcast transcribing workflow will use:
 
     ```shell
     gcloud iam service-accounts create mc-transcribe-podcasts-test \
@@ -74,12 +81,18 @@ In order to transcribe podcast episodes using Google Cloud's Speech API, you'll
         --description="(test) Upload episodes to GCS, submit them to Speech API, fetch transcripts"
     ```
 
-11. Allow the service account to read / write objects from bucket (here `mc-upload-episode-audio-files` is the service account name, and `mc-podcast-transcription-test` is the Google Cloud project ID):
+11. Allow the service account to read / write objects from buckets (here `mc-upload-episode-audio-files` is the service account name, and `mc-podcast-transcription-test` is the Google Cloud project ID):
 
     ```shell
     gsutil acl ch \
-        -u mc-transcribe-podcasts-test@mc-podcast-transcription-test.iam.gserviceaccount.com:O \
-        gs://mc-podcast-episodes-audio-files-test
+        -u mc-transcribe-podcasts-test@meag-podcast-transcription-tst.iam.gserviceaccount.com:O \
+        gs://mc-podcast-raw-enclosures-test
+    gsutil acl ch \
+        -u mc-transcribe-podcasts-test@meag-podcast-transcription-tst.iam.gserviceaccount.com:O \
+        gs://mc-podcast-transcoded-episodes-test
+    gsutil acl ch \
+        -u mc-transcribe-podcasts-test@meag-podcast-transcription-tst.iam.gserviceaccount.com:O \
+        gs://mc-podcast-transcripts-test
     ```
 
 12. Generate authentication JSON credentials:
@@ -87,7 +100,7 @@ In order to transcribe podcast episodes using Google Cloud's Speech API, you'll
     ```shell
     gcloud iam service-accounts keys create \
         mc-transcribe-podcasts-test.json \
-        --iam-account mc-transcribe-podcasts-test@mc-podcast-transcription-test.iam.gserviceaccount.com
+        --iam-account mc-transcribe-podcasts-test@meag-podcast-transcription-tst.iam.gserviceaccount.com
     ```
 
 13. Encode contents of `mc-transcribe-podcasts-test.json` to Base64:
@@ -96,4 +109,4 @@ In order to transcribe podcast episodes using Google Cloud's Speech API, you'll
     base64 mc-transcribe-podcasts-test.json
     ```
 
-13. Copy the resulting Base64-encoded string to `MC_PODCAST_GC_AUTH_JSON_BASE64` environment variable that's set for apps using Google Cloud services for podcast transcription.
+13. Copy the resulting Base64-encoded string to `MC_PODCAST_AUTH_JSON_BASE64` environment variable that's set for apps using Google Cloud services for podcast transcription.
diff --git a/doc/postgresql_upgrade.markdown b/doc/postgresql_upgrade.markdown
new file mode 100644
index 0000000000..24ea81db47
--- /dev/null
+++ b/doc/postgresql_upgrade.markdown
@@ -0,0 +1,163 @@
+# PostgreSQL upgrade
+
+To upgrade PostgreSQL between two (e.g. 12 -> 13) or more (e.g. 11 -> 12 -> 13) versions, do the following:
+
+
+## Preparation (up to a week before)
+
+1. Sync the initial PostgreSQL dataset to a backup server:
+
+    ```bash
+    production$ sudo zfs snapshot space/mediacloud/vol_postgresql_data@11_initial
+
+    production$ sudo zfs send space/mediacloud/vol_postgresql_data@11_initial | \
+        mbuffer -s 128k -m 10M | \
+        pv | \
+        ssh backup sudo zfs receive -F space/mediacloud/vol_postgresql_data
+    ```
+
+2. Update `apps/postgresql-upgrade/Dockerfile` for it to install the version that you're upgrading *from* and the in-between versions if needed, and then build + push the image.
+
+    You should result with an image that includes all PostgreSQL versions that are needed for upgrading, e.g. if you're upgrading from 11 to 13, `postgresql-upgrade` should include PostgreSQL versions 11, 12 and 13:
+
+    ```dockerfile
+    # Parent image already installs PostgreSQL 13
+    FROM gcr.io/mcback/postgresql-server:latest
+
+    # <...>
+
+    RUN \
+        #
+        # Install PostgreSQL 11 (oldest version)
+        apt-get -y --no-install-recommends install \
+            postgresql-11 \
+            postgresql-client-11 \
+            postgresql-contrib-11 \
+            postgresql-plperl-11 \
+        && \
+        #
+        # Install PostgreSQL 12 (intermediate version)
+        apt-get -y --no-install-recommends install \
+            postgresql-12 \
+            postgresql-client-12 \
+            postgresql-contrib-12 \
+            postgresql-plperl-12 \
+        && \
+        #
+        true
+    ```
+
+3. Run a test upgrade on a backup server to find out if it works and how long it will take:
+
+    ```bash
+    backup$ time docker run -it \
+        --shm-size=64g \
+        -v /space/mediacloud/vol_postgresql_data:/var/lib/postgresql/ \
+        gcr.io/mcback/postgresql-upgrade \
+        postgresql_upgrade.py \
+            --source_version=11 \
+            --target_version=13 \
+        &> test_postgresql_upgrade.log
+
+    backup$ sudo zfs rollback space/mediacloud/vol_postgresql_data@11_initial
+    ```
+
+    If it doesn't work, fix the issues on the production server and `zfs send -i old_snapshot new_snapshot` the changes. Rinse and repeat until it works.
+
+    Take note how long it will take for the upgrade script to run.
+
+
+## Pre-upgrade (a day before)
+
+4. A day or so before the upgrade, create a new dataset snapshot and sync it to the backup server.
+
+    This is done to reduce the time it will require to sync the final snapshot after the database is down for the upgrade.
+
+    ```bash
+    production$ sudo zfs snapshot space/mediacloud/vol_postgresql_data@11_intermediate
+
+    production$ sudo zfs send -i \
+            space/mediacloud/vol_postgresql_data@11_initial \
+            space/mediacloud/vol_postgresql_data@11_intermediate \
+        | \
+        mbuffer -s 128k -m 10M | \
+        pv | \
+        ssh backup sudo zfs receive -F space/mediacloud/vol_postgresql_data
+    ```
+
+
+## Upgrade
+
+5. Stop all services:
+
+    ```bash
+    docker service rm mediacloud
+    ```
+
+    Make sure `postgresql-server` has stopped. If it hasn't, wait for it to stop.
+
+6. Make a final PostgreSQL dataset snapshot and sync it to the backup server:
+
+    ```bash
+    production$ sudo zfs snapshot space/mediacloud/vol_postgresql_data@11_final
+
+    production$ sudo zfs send -i \
+            space/mediacloud/vol_postgresql_data@11_intermediate \
+            space/mediacloud/vol_postgresql_data@11_final \
+        | \
+        mbuffer -s 128k -m 10M | \
+        pv | \
+        ssh backup sudo zfs receive -F space/mediacloud/vol_postgresql_data
+    ```
+
+7. Run the upgrade script:
+
+    ```bash
+    production$ time docker run -it \
+        --shm-size=64g \
+        -v /space/mediacloud/vol_postgresql_data:/var/lib/postgresql/ \
+        gcr.io/mcback/postgresql-upgrade \
+        postgresql_upgrade.py \
+            --source_version=11 \
+            --target_version=13 \
+        &> postgresql_upgrade.log
+    ```
+
+8. Create a post-upgrade snapshot:
+
+    ```bash
+    production$ sudo zfs snapshot space/mediacloud/vol_postgresql_data@13_initial
+    ```
+
+9. Restart all services:
+
+    ```bash
+    docker stack deploy -c docker-compose.mediacloud.yml mediacloud
+    ```
+
+
+## Cleanup
+
+10. Copy post-upgrade snapshot to the backup server:
+
+    ```bash
+    production$ sudo zfs send -i \
+            space/mediacloud/vol_postgresql_data@11_final \
+            space/mediacloud/vol_postgresql_data@13_initial \
+        | \
+        mbuffer -s 128k -m 10M | \
+        pv | \
+        ssh backup sudo zfs receive -F space/mediacloud/vol_postgresql_data
+    ```
+
+11. Clean up pre-upgrade snapshots:
+
+    ```bash
+    backup$ zfs destroy space/mediacloud/vol_postgresql_data@11_initial
+    backup$ zfs destroy space/mediacloud/vol_postgresql_data@11_intermediate
+    backup$ zfs destroy space/mediacloud/vol_postgresql_data@11_final
+
+    production$ zfs destroy space/mediacloud/vol_postgresql_data@11_initial
+    production$ zfs destroy space/mediacloud/vol_postgresql_data@11_intermediate
+    production$ zfs destroy space/mediacloud/vol_postgresql_data@11_final
+    ```
diff --git a/doc/pycharm.markdown b/doc/pycharm.markdown
index 67051e8175..df10ad0dfb 100644
--- a/doc/pycharm.markdown
+++ b/doc/pycharm.markdown
@@ -229,11 +229,18 @@ Without the SQL schema dialect and data source configuration, PyCharm will compl
 
 ![](https://github.com/mediacloud/backend-docs-images/raw/master/docker-pycharm/schema-01-dialect-warning.png)
 
+To generate the latest version of the schema, navigate to the project root in your terminal and run `./dev/get_schema.sh`.
+
 To configure SQL schema dialect and data source:
 
-1. To generate the latest version of the schema, navigate to the project root in your terminal and run `./dev/get_schema.sh`.
+1. Symlink `apps/postgresql-server/schema/mediawords.sql` to `.idea/` directory:
+
+      ```bash
+      cd apps/your_app/.idea/
+      ln -s ../../postgresql-server/schema/mediawords.sql .
+      ```
 
-2. In PyCharm, go to *Languages & Frameworks* -> *SQL Dialects*, set the *Project SQL Dialect:* to *PostgreSQL* and click *OK*:
+2. Go to *Languages & Frameworks* -> *SQL Dialects*, set the *Project SQL Dialect:* to *PostgreSQL* and click *OK*:
 
    ![](https://github.com/mediacloud/backend-docs-images/raw/master/docker-pycharm/schema-02-dialect-project.png)
 
@@ -245,7 +252,7 @@ To configure SQL schema dialect and data source:
 
    ![](https://github.com/mediacloud/backend-docs-images/raw/master/docker-pycharm/schema-04-data-source-ddl.png)
 
-5. Leave the default value of the *Name:* field intact and under *DDL Files*, add the `mediawords.sql` file (located in `postgresql-server/schema`) and click *OK*:
+5. Leave the default value of the *Name:* field intact and under *DDL Files*, add a `mediawords.sql` file located in `.idea/` directory (you might need to click on *Show Hidden Files and Directories* button first) and click *OK*:
 
    ![](https://github.com/mediacloud/backend-docs-images/raw/master/docker-pycharm/schema-05-data-source-mediawords.png)
 
diff --git a/doc/workflows.markdown b/doc/workflows.markdown
new file mode 100644
index 0000000000..45144830c4
--- /dev/null
+++ b/doc/workflows.markdown
@@ -0,0 +1,419 @@
+<!-- MEDIACLOUD-TOC-START -->
+
+Table of Contents
+=================
+
+   * [Workflows](#workflows)
+      * [Samples](#samples)
+         * [Retry parameters](#retry-parameters)
+         * [Activity interface](#activity-interface)
+         * [Activity interface with custom retries](#activity-interface-with-custom-retries)
+         * [Workflow interface](#workflow-interface)
+         * [Running a workflow](#running-a-workflow)
+            * [Asynchronously](#asynchronously)
+            * [Synchronously](#synchronously)
+      * [Tips &amp; tricks](#tips--tricks)
+         * [Name workflow (activity) interface as XYZWorkflow (<code>XYZActivities</code>), implementation as <code>XYZWorkflowImpl</code> (<code>XYZActivitiesImpl</code>)](#name-workflow-activity-interface-as-xyzworkflow-xyzactivities-implementation-as-xyzworkflowimpl-xyzactivitiesimpl)
+         * [Make activities idempotent](#make-activities-idempotent)
+         * [Limit activity invocations in a single workflow to 1000](#limit-activity-invocations-in-a-single-workflow-to-1000)
+         * [Limit the activity payload to 200 KB](#limit-the-activity-payload-to-200-kb)
+         * [Use positional arguments](#use-positional-arguments)
+         * [Make arguments serializable by encode_json()](#make-arguments-serializable-by-encode_json)
+         * [Use connect_to_db_or_raise() instead of <code>connect_to_db()</code>](#use-connect_to_db_or_raise-instead-of-connect_to_db)
+         * [Use stop_worker_faster() to stop local workers used in tests](#use-stop_worker_faster-to-stop-local-workers-used-in-tests)
+         * [Reuse WorkflowClient objects when possible](#reuse-workflowclient-objects-when-possible)
+      * [Links](#links)
+
+----
+<!-- MEDIACLOUD-TOC-END -->
+
+# Workflows
+
+
+## Samples
+
+
+### Retry parameters
+
+```python
+DEFAULT_RETRY_PARAMETERS = RetryParameters(
+
+    # InitialInterval is a delay before the first retry.
+    initial_interval=timedelta(seconds=1),
+
+    # BackoffCoefficient. Retry policies are exponential. The coefficient specifies how fast the retry interval is
+    # growing. The coefficient of 1 means that the retry interval is always equal to the InitialInterval.
+    backoff_coefficient=2,
+
+    # MaximumInterval specifies the maximum interval between retries. Useful for coefficients more than 1.
+    maximum_interval=timedelta(hours=2),
+
+    # MaximumAttempts specifies how many times to attempt to execute an Activity in the presence of failures. If this
+    # limit is exceeded, the error is returned back to the Workflow that invoked the Activity.
+
+    # We start off with a huge default retry count for each individual activity (1000 attempts * 2 hour max. interval
+    # = about a month worth of retrying) to give us time to detect problems, fix them, deploy fixes and let the workflow
+    # system just handle the rest without us having to restart workflows manually.
+    #
+    # Activities for which retrying too much doesn't make sense (e.g. due to the cost) set their own "maximum_attempts".
+    maximum_attempts=1000,
+
+    # NonRetryableErrorReasons allows you to specify errors that shouldn't be retried. For example retrying invalid
+    # arguments error doesn't make sense in some scenarios.
+    non_retryable_error_types=[
+
+        # Counterintuitively, we *do* want to retry not only on transient errors but also on programming and
+        # configuration ones too because on programming / configuration bugs we can just fix up some code or
+        # configuration, deploy the fixes and let the workflow system automagically continue on with the workflow
+        # without us having to dig out what exactly has failed and restart things.
+        #
+        # However, on "permanent" errors (the ones when some action decides that it just can't proceed with this
+        # particular input, e.g. process a story that does not exist) there's no point in retrying anything.
+        # anything anymore.
+        McPermanentError.__name__,
+
+    ],
+)
+```
+
+
+### Activity interface
+
+```python
+class SampleActivities(object):
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+
+        # ScheduleToStart is the maximum time from a Workflow requesting Activity execution to a worker starting its
+        # execution. The usual reason for this timeout to fire is all workers being down or not being able to keep up
+        # with the request rate. We recommend setting this timeout to the maximum time a Workflow is willing to wait for
+        # an Activity execution in the presence of all possible worker outages.
+        schedule_to_start_timeout=None,
+
+        # StartToClose is the maximum time an Activity can execute after it was picked by a worker.
+        start_to_close_timeout=timedelta(seconds=60),
+
+        # ScheduleToClose is the maximum time from the Workflow requesting an Activity execution to its completion.
+        schedule_to_close_timeout=None,
+
+        # Heartbeat is the maximum time between heartbeat requests. See Long Running Activities.
+        # (https://docs.temporal.io/docs/concept-activities/#long-running-activities)
+        heartbeat_timeout=None,
+
+        retry_parameters=DEFAULT_RETRY_PARAMETERS,
+    )
+    async def sample_activity(self, stories_id: int) -> Optional[str]:
+        raise NotImplementedError
+```
+
+
+### Activity interface with custom retries
+
+```python
+class SampleActivities(object):
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        schedule_to_start_timeout=None,
+        start_to_close_timeout=timedelta(seconds=60),
+        schedule_to_close_timeout=None,
+        heartbeat_timeout=None,
+        retry_parameters=dataclasses.replace(
+            DEFAULT_RETRY_PARAMETERS,
+
+            # Wait for a minute before trying again
+            initial_interval=timedelta(minutes=1),
+
+            # Hope for the server to resurrect in a week
+            maximum_interval=timedelta(weeks=1),
+
+            # Don't kill ourselves trying to hit a permanently dead server
+            maximum_attempts=50,
+        ),
+    )
+    async def another_sample_activity_with_custom_retries(self, stories_id: int) -> Optional[str]:
+        raise NotImplementedError
+```
+
+
+### Workflow interface
+
+```python
+class SampleWorkflow(object):
+
+    @workflow_method(task_queue=TASK_QUEUE)
+    async def sample_workflow_method(self, stories_id: int) -> None:
+        raise NotImplementedError
+```
+
+
+### Running a workflow
+
+
+#### Asynchronously
+
+"Fire and forget" about the workflow:
+
+```python
+from mediawords.workflow.client import workflow_client
+
+
+client = workflow_client()
+workflow: SampleWorkflow = client.new_workflow_stub(
+    cls=SampleWorkflow,
+    workflow_options=WorkflowOptions(workflow_id=str(stories_id)),
+)
+
+await WorkflowClient.start(workflow.sample_workflow_method, stories_id)
+```
+
+
+#### Synchronously
+
+Start a workflow and wait for it to complete:
+
+```python
+from mediawords.workflow.client import workflow_client
+
+
+client = workflow_client()
+workflow: SampleWorkflow = client.new_workflow_stub(
+    cls=SampleWorkflow,
+    workflow_options=WorkflowOptions(workflow_id=str(stories_id)),
+)
+
+result = await workflow.transcribe_episode(stories_id)
+```
+
+
+## Tips & tricks
+
+
+### Name workflow (activity) interface as `XYZWorkflow` (`XYZActivities`), implementation as `XYZWorkflowImpl` (`XYZActivitiesImpl`)
+
+Temporal's webapp uses the interface's class name as the workflow name by default, so that way the workflow names look better and are more easily searchable.
+
+```python
+# Good!
+
+class KardashianActivities(object):
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+    )
+    async def add_new_kardashian(self) -> None:
+        # ...
+
+class KardashianActivitiesImpl(KardashianActivities):
+
+    async def add_new_kardashian(self) -> None:
+        # ...
+
+
+class KardashianWorkflow(object):
+
+    @workflow_method(task_queue=TASK_QUEUE)
+    async def keep_up_with_kardashians(self) -> None:
+        # ...
+
+class KardashianWorkflowImpl(KardashianWorkflow):
+
+    async def keep_up_with_kardashians(self) -> None:
+        # ...
+```
+
+
+### Make activities idempotent
+
+Temporal guarantees at-least-once activity invocations, so some activities might have to be rerun occasionally:
+
+```python
+# Bad!
+
+class KardashianActivitiesImpl(KardashianActivities):
+
+    async def add_new_kardashian(self) -> None:
+        db = connect_to_db_or_raise()
+
+        # If this activity gets run twice, we'll end up with two Kims in the
+        # "kardashians" table which is against our strategic goals
+        db.query("""
+            INSERT INTO kardashians (name, surname)
+            VALUES ('Kim', 'Kardashian')
+        """)
+```
+
+Therefore, activities need to be "ready" for getting run twice sometimes:
+
+```python
+# Good!
+
+class KardashianActivitiesImpl(KardashianActivities):
+
+    async def add_new_kardashian(self) -> None:
+        db = connect_to_db_or_raise()
+
+        # Here we're assuming that there's a unique index on (name, surname)
+        # and using the ON CONFLICT upsert:
+        # https://www.postgresql.org/docs/current/sql-insert.html#SQL-ON-CONFLICT
+        db.query("""
+            INSERT INTO kardashians (name, surname)
+            VALUES ('Kim', 'Kardashian')
+            ON CONFLICT (name, surname) DO NOTHING
+        """)
+```
+
+
+### Limit activity invocations in a single workflow to 1000
+
+While workflow count itself is largely unlimited, the history size (where action invocations get logged to) is [limited to 10 MB (soft limit) / 50 MB (hard limit)](https://github.com/temporalio/temporal/blob/v1.7.0/service/history/configs/config.go#L380-L381), and history count is limited to [10k (soft limit) / 50k (hard limit) entries](https://github.com/temporalio/temporal/blob/v1.7.0/service/history/configs/config.go#L382-L383).
+
+Given that an activity might get retried a few times, and those retries will end up in the workflow's history too, don't invoke too many activities in a single workflow run.
+
+Instead, go for **hierarchical workflows.** For example, if an activity fetches an URL, and you're planning on fetching 1 million URLs, you can make a parent workflow start 1000 children workflows and wait for their completion.
+
+<!-- FIXME add an example -->
+<!-- FIXME ContinueAsNew once that becomes available in the Python SDK -->
+
+
+### Limit the activity payload to 200 KB
+
+Activity arguments get serialized into JSON, sent over the network and then unserialized, so passing around huge JSON payloads hits the performance. Also, payloads are visible in the web UI so loading a huge JSON file in the Temporal's webapp is not practical.
+
+Instead of passing around huge chunks of data in payloads, store it somewhere in the database.
+
+
+### Use positional arguments
+
+At the time of writing, the Python SDK is unable to serialize named arguments (`**kwargs`) and pass them to workflow / action methods:
+
+```python
+# Bad!
+await workflow.transcribe_episode(stories_id=stories_id)
+```
+
+so positional arguments (`*args`) have to be used instead:
+
+```python
+# Good!
+await workflow.transcribe_episode(stories_id)
+```
+
+
+### Make arguments serializable by `encode_json()`
+
+Python SDK serializes arguments to workflow and individual activities with `encode_json()`, and the default `JSONEncoder` is [limited](https://docs.python.org/3/library/json.html#json.JSONEncoder) in what it's able to serialize:
+
+```python
+# Bad!
+
+class FancyObject(object):
+    def __init__(self, fancy_argument: int):
+        self.fancy_argument = fancy_argument
+
+class FancyActivities(object):
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+    )
+    async def fancy_activity(self, fancy: FancyObject) -> bool:
+        # <...>
+```
+
+Instead, opt for simple dicts:
+
+```python
+# Good!
+
+from typing import Dict, Any
+
+class FancyObject(object):
+    def __init__(self, fancy_argument: int):
+        self.fancy_argument = fancy_argument
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'fancy_argument': self.fancy_argument,
+        }
+
+    @classmethod
+    def from_dict(self, input_dict: Dict[str, Any]) -> 'FancyObject':
+        return cls(fancy_argument=fancy_argument)
+
+class FancyActivities(object):
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+    )
+    async def fancy_activity(self, fancy: Dict[str, Any]) -> bool:
+        # Convert back to an object
+        fancy = FancyObject.from_dict(fancy)
+        # <...>
+```
+
+or define a new `typing` type to make it more obvious what the activity method is supposed to find in the argument dictionary:
+
+```python
+# Better (somewhat)!
+
+from typing import Dict, Any
+
+FancyObjectDict = Dict[str, Any]
+
+class FancyObject(object):
+    def __init__(self, fancy_argument: int):
+        self.fancy_argument = fancy_argument
+
+    def to_dict(self) -> FancyObjectDict:
+        return {
+            'fancy_argument': self.fancy_argument,
+        }
+
+    @classmethod
+    def from_dict(self, input_dict: FancyObjectDict) -> 'FancyObject':
+        return cls(fancy_argument=fancy_argument)
+
+class FancyActivities(object):
+
+    @activity_method(
+        task_queue=TASK_QUEUE,
+        start_to_close_timeout=timedelta(seconds=60),
+    )
+    async def fancy_activity(self, fancy: FancyObjectDict) -> bool:
+        # Convert back to an object
+        fancy = FancyObject.from_dict(fancy)
+        # <...>
+```
+
+
+### Use `connect_to_db_or_raise()` instead of `connect_to_db()`
+
+By default, `connect_to_db()` will attempt connecting to the database quite a few times, and if it fails to do so, it will call `fatal_error()` thus stopping the whole application that has called the function.
+
+Temporal implements retries itself, plus it's not beneficial to quit the worker on database connection issues (as the worker then should continue on retrying), so instead of `connect_to_db()` go for `connect_to_db_or_raise()` which attempts connecting to PostgreSQL only once, and raises a simple exception on failures instead of stopping the whole application.
+
+
+### Use `stop_worker_faster()` to stop local workers used in tests
+
+Default implementation of `worker.stop()` waits for the whole 5 seconds between attempts to stop all the worker threads. Our own hack implemented in `stop_worker_faster()` tests whether the workers managed to stop every 0.5 seconds.
+
+This is useful in tests in which we run local workers and want to stop them afterwards.
+
+
+### Reuse `WorkflowClient` objects when possible
+
+Try avoiding creating a new `WorkflowClient` object often as ["it is a heavyweight object that establishes persistent TCP connections"](https://github.com/uber/cadence/issues/2528#issuecomment-530894674).
+
+
+## Links
+
+* [Main Temporal website](https://temporal.io/)
+* [Temporal Python SDK](https://github.com/firdaus/temporal-python-sdk)
+    * [Tests with many usage samples](https://github.com/firdaus/temporal-python-sdk/tree/master/tests)
+* ["Workflows in Python using Temporal"](https://onepointzero.app/workflows-in-python-using-temporal/), a blog post by the author of the Python SDK with many examples
+* [Workflow samples in Go](https://github.com/temporalio/samples-go), many of which adaptable to Python
+    * [Mutex workflow sample](https://github.com/temporalio/samples-go/tree/master/mutex)
diff --git a/provision/roles/docker/tasks/iptables.yml b/provision/roles/docker/tasks/iptables.yml
index aa1c1b858f..c374ba04b4 100644
--- a/provision/roles/docker/tasks/iptables.yml
+++ b/provision/roles/docker/tasks/iptables.yml
@@ -161,6 +161,34 @@
     - docker
     - iptables
 
+- name: Deny connections to Temporal webapp
+  iptables:
+    # Insert before RETURN
+    action: insert
+    chain: DOCKER-USER
+    protocol: tcp
+    destination_port: 8088
+    jump: DROP
+  become: true
+  become_user: root
+  tags:
+    - docker
+    - iptables
+
+- name: Deny connections to Temporal Grafana
+  iptables:
+    # Insert before RETURN
+    action: insert
+    chain: DOCKER-USER
+    protocol: tcp
+    destination_port: 3000
+    jump: DROP
+  become: true
+  become_user: root
+  tags:
+    - docker
+    - iptables
+
 - name: Save IPv4 rules
   community.general.iptables_state:
     ip_version: ipv4